stela ops usage fixes roles propagation and timoeut, one account to support multi tenants, migrations consolidation, search to support documentation, doctor and open api vector db search

This commit is contained in:
master
2026-02-22 19:27:54 +02:00
parent a29f438f53
commit bd8fee6ed8
373 changed files with 832097 additions and 3369 deletions

View File

@@ -0,0 +1,35 @@
using System.Text.Json;
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
internal sealed record DoctorSearchSeedEntry(
string CheckCode,
string Title,
string Severity,
string Description,
string Remediation,
string RunCommand,
IReadOnlyList<string> Symptoms,
IReadOnlyList<string> Tags,
IReadOnlyList<string> References);
internal static class DoctorSearchSeedLoader
{
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web);
public static IReadOnlyList<DoctorSearchSeedEntry> Load(string absolutePath)
{
if (!File.Exists(absolutePath))
{
return [];
}
using var stream = File.OpenRead(absolutePath);
var entries = JsonSerializer.Deserialize<List<DoctorSearchSeedEntry>>(stream, JsonOptions) ?? [];
return entries
.Where(static entry => !string.IsNullOrWhiteSpace(entry.CheckCode))
.OrderBy(static entry => entry.CheckCode, StringComparer.Ordinal)
.ToList();
}
}

View File

@@ -0,0 +1,6 @@
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
public interface IKnowledgeIndexer
{
Task<KnowledgeRebuildSummary> RebuildAsync(CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,6 @@
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
public interface IKnowledgeSearchBenchmarkDatasetGenerator
{
Task<KnowledgeBenchmarkDataset> GenerateAsync(KnowledgeBenchmarkDatasetOptions options, CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,9 @@
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
public interface IKnowledgeSearchBenchmarkRunner
{
Task<KnowledgeBenchmarkRunResult> RunAsync(
KnowledgeBenchmarkDataset dataset,
KnowledgeBenchmarkRunOptions options,
CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,6 @@
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
public interface IKnowledgeSearchService
{
Task<KnowledgeSearchResponse> SearchAsync(KnowledgeSearchRequest request, CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,22 @@
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
internal interface IKnowledgeSearchStore
{
Task EnsureSchemaAsync(CancellationToken cancellationToken);
Task ReplaceIndexAsync(KnowledgeIndexSnapshot snapshot, CancellationToken cancellationToken);
Task<IReadOnlyList<KnowledgeChunkRow>> SearchFtsAsync(
string query,
KnowledgeSearchFilter? filters,
int take,
TimeSpan timeout,
CancellationToken cancellationToken);
Task<IReadOnlyList<KnowledgeChunkRow>> LoadVectorCandidatesAsync(
float[] queryEmbedding,
KnowledgeSearchFilter? filters,
int take,
TimeSpan timeout,
CancellationToken cancellationToken);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,556 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using System.Collections.Immutable;
using System.Text.Json;
using System.Text.RegularExpressions;
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
internal sealed class KnowledgeSearchBenchmarkDatasetGenerator : IKnowledgeSearchBenchmarkDatasetGenerator
{
private static readonly Regex HeadingPattern = new(
"^#{2,3}\\s+(.+?)\\s*$",
RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static readonly string[] OpenApiMethods = ["get", "put", "post", "delete", "patch", "head", "options", "trace"];
private static readonly IReadOnlyList<DoctorSearchSeedEntry> DefaultDoctorSeeds =
[
new(
"check.core.disk.space",
"Disk space availability",
"high",
"Low disk space can block ingestion pipelines and worker execution.",
"Free disk space and verify retention settings.",
"stella doctor run --check check.core.disk.space",
["no space left on device", "disk full", "write failure"],
["doctor", "storage", "core"],
["docs/operations/devops/runbooks/deployment-upgrade.md"]),
new(
"check.core.db.connectivity",
"PostgreSQL connectivity",
"high",
"Doctor failed to connect to PostgreSQL or connection health checks timed out.",
"Validate credentials, network reachability, and TLS settings.",
"stella doctor run --check check.core.db.connectivity",
["database unavailable", "connection refused", "timeout expired"],
["doctor", "database", "connectivity"],
["docs/INSTALL_GUIDE.md"]),
new(
"check.security.oidc.readiness",
"OIDC readiness",
"warn",
"OIDC prerequisites are missing or identity issuer metadata is not reachable.",
"Verify issuer URL, JWKS availability, and Authority client configuration.",
"stella doctor run --check check.security.oidc.readiness",
["oidc setup", "invalid issuer", "jwks"],
["doctor", "security", "oidc"],
["docs/modules/authority/architecture.md"]),
new(
"check.router.gateway.routes",
"Router route registration",
"warn",
"Expected gateway routes were not registered or health probes failed.",
"Inspect route tables and refresh router registration.",
"stella doctor run --check check.router.gateway.routes",
["route missing", "404 on expected endpoint", "gateway routing"],
["doctor", "router", "gateway"],
["docs/modules/router/README.md"]),
new(
"check.integrations.secrets.binding",
"Integration secret binding",
"medium",
"Integration connectors cannot resolve configured secrets.",
"Validate secret provider configuration and rotate invalid credentials.",
"stella doctor run --check check.integrations.secrets.binding",
["secret missing", "invalid credential", "auth failed"],
["doctor", "integrations", "secrets"],
["docs/modules/platform/architecture-overview.md"]),
];
private readonly KnowledgeSearchOptions _options;
private readonly ILogger<KnowledgeSearchBenchmarkDatasetGenerator> _logger;
public KnowledgeSearchBenchmarkDatasetGenerator(
IOptions<KnowledgeSearchOptions> options,
ILogger<KnowledgeSearchBenchmarkDatasetGenerator> logger)
{
ArgumentNullException.ThrowIfNull(options);
_options = options.Value ?? new KnowledgeSearchOptions();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public Task<KnowledgeBenchmarkDataset> GenerateAsync(
KnowledgeBenchmarkDatasetOptions options,
CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(options);
var repositoryRoot = ResolveRepositoryRoot();
var targets = new List<BenchmarkTarget>(2048);
targets.AddRange(LoadMarkdownTargets(repositoryRoot, cancellationToken));
targets.AddRange(LoadOpenApiTargets(repositoryRoot, cancellationToken));
targets.AddRange(LoadDoctorTargets(repositoryRoot, cancellationToken));
if (targets.Count == 0)
{
_logger.LogWarning("Knowledge benchmark dataset generation produced no targets.");
return Task.FromResult(
new KnowledgeBenchmarkDataset(
_options.Product,
_options.Version,
DateTimeOffset.UtcNow,
[]));
}
var orderedTargets = targets
.OrderBy(static target => target.Type, StringComparer.Ordinal)
.ThenBy(static target => target.TargetId, StringComparer.Ordinal)
.ToArray();
var cases = BuildCases(orderedTargets, options, cancellationToken);
var dataset = new KnowledgeBenchmarkDataset(
_options.Product,
_options.Version,
DateTimeOffset.UtcNow,
cases);
return Task.FromResult(dataset);
}
private string ResolveRepositoryRoot()
{
if (string.IsNullOrWhiteSpace(_options.RepositoryRoot))
{
return Directory.GetCurrentDirectory();
}
if (Path.IsPathRooted(_options.RepositoryRoot))
{
return Path.GetFullPath(_options.RepositoryRoot);
}
return Path.GetFullPath(Path.Combine(Directory.GetCurrentDirectory(), _options.RepositoryRoot));
}
private IReadOnlyList<BenchmarkTarget> LoadMarkdownTargets(string repositoryRoot, CancellationToken cancellationToken)
{
var targets = new List<BenchmarkTarget>();
foreach (var filePath in EnumerateMarkdownFiles(repositoryRoot))
{
cancellationToken.ThrowIfCancellationRequested();
var relativePath = ToRelativeRepositoryPath(repositoryRoot, filePath);
var lines = File.ReadAllLines(filePath);
var slugCounter = new Dictionary<string, int>(StringComparer.Ordinal);
foreach (var line in lines)
{
if (!TryReadHeading(line, out var heading))
{
continue;
}
var slug = KnowledgeSearchText.Slugify(heading);
var count = slugCounter.TryGetValue(slug, out var existing) ? existing + 1 : 1;
slugCounter[slug] = count;
var anchor = count == 1 ? slug : $"{slug}-{count}";
var title = heading.Trim();
var id = $"docs:{relativePath}:{anchor}";
var expected = new KnowledgeBenchmarkExpectedTarget
{
Type = "docs",
Path = relativePath,
Anchor = anchor,
};
var hints = BuildDocQueryHints(title, relativePath);
targets.Add(new BenchmarkTarget(id, "docs", title, expected, hints));
}
}
return targets;
}
private IReadOnlyList<BenchmarkTarget> LoadOpenApiTargets(string repositoryRoot, CancellationToken cancellationToken)
{
var targets = new List<BenchmarkTarget>();
foreach (var filePath in EnumerateOpenApiFiles(repositoryRoot))
{
cancellationToken.ThrowIfCancellationRequested();
JsonDocument document;
try
{
using var stream = File.OpenRead(filePath);
document = JsonDocument.Parse(stream);
}
catch (Exception ex) when (ex is IOException or JsonException)
{
_logger.LogDebug(ex, "Skipping benchmark OpenAPI file {Path}.", filePath);
continue;
}
using (document)
{
if (!document.RootElement.TryGetProperty("paths", out var pathsElement) || pathsElement.ValueKind != JsonValueKind.Object)
{
continue;
}
var relativePath = ToRelativeRepositoryPath(repositoryRoot, filePath);
var service = DeriveOpenApiServiceName(relativePath, document.RootElement);
foreach (var pathProperty in pathsElement.EnumerateObject().OrderBy(static item => item.Name, StringComparer.Ordinal))
{
if (pathProperty.Value.ValueKind != JsonValueKind.Object)
{
continue;
}
foreach (var methodProperty in pathProperty.Value.EnumerateObject().OrderBy(static item => item.Name, StringComparer.Ordinal))
{
if (!OpenApiMethods.Contains(methodProperty.Name, StringComparer.OrdinalIgnoreCase) || methodProperty.Value.ValueKind != JsonValueKind.Object)
{
continue;
}
var method = methodProperty.Name.ToUpperInvariant();
var path = pathProperty.Name;
var operationId = TryGetString(methodProperty.Value, "operationId");
var summary = TryGetString(methodProperty.Value, "summary")
?? TryGetString(methodProperty.Value, "description")
?? $"{method} {path}";
var title = string.IsNullOrWhiteSpace(operationId) ? $"{method} {path}" : operationId!;
var id = $"api:{service}:{method}:{path}:{operationId}";
var expected = new KnowledgeBenchmarkExpectedTarget
{
Type = "api",
Method = method,
ApiPath = path,
OperationId = operationId,
};
var hints = BuildApiQueryHints(service, method, path, title, summary, ExtractStringArray(methodProperty.Value, "tags"));
targets.Add(new BenchmarkTarget(id, "api", title, expected, hints));
}
}
}
}
return targets;
}
private IReadOnlyList<BenchmarkTarget> LoadDoctorTargets(string repositoryRoot, CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();
var seedPath = ResolvePath(repositoryRoot, _options.DoctorSeedPath);
var seeds = DoctorSearchSeedLoader.Load(seedPath);
if (seeds.Count == 0)
{
seeds = DefaultDoctorSeeds;
}
var targets = new List<BenchmarkTarget>(seeds.Count);
foreach (var entry in seeds.OrderBy(static seed => seed.CheckCode, StringComparer.Ordinal))
{
cancellationToken.ThrowIfCancellationRequested();
var expected = new KnowledgeBenchmarkExpectedTarget
{
Type = "doctor",
CheckCode = entry.CheckCode,
};
var hints = BuildDoctorQueryHints(entry);
targets.Add(new BenchmarkTarget(
$"doctor:{entry.CheckCode}",
"doctor",
entry.Title,
expected,
hints));
}
return targets;
}
private static IReadOnlyList<KnowledgeBenchmarkCase> BuildCases(
IReadOnlyList<BenchmarkTarget> targets,
KnowledgeBenchmarkDatasetOptions options,
CancellationToken cancellationToken)
{
var minCount = Math.Max(1, options.MinQueryCount);
var maxCount = Math.Max(minCount, options.MaxQueryCount);
var variantsPerTarget = Math.Clamp(options.VariantsPerTarget, 1, 16);
var cases = new List<KnowledgeBenchmarkCase>(Math.Min(maxCount, targets.Count * variantsPerTarget * 2));
var seenQueries = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var target in targets)
{
cancellationToken.ThrowIfCancellationRequested();
var added = 0;
foreach (var query in target.QueryHints)
{
var normalized = KnowledgeSearchText.NormalizeWhitespace(query);
if (string.IsNullOrWhiteSpace(normalized) || !seenQueries.Add(normalized))
{
continue;
}
var caseId = KnowledgeSearchText.StableId("bench-case", target.TargetId, added.ToString());
cases.Add(new KnowledgeBenchmarkCase(caseId, normalized, target.Type, target.Expected));
added++;
if (added >= variantsPerTarget || cases.Count >= maxCount)
{
break;
}
}
if (cases.Count >= maxCount)
{
break;
}
}
var expansionTemplates = new[]
{
"how to fix {0}",
"troubleshoot {0}",
"stella ops {0}",
"{0} remediation",
"{0} runbook",
};
var expansionIndex = 0;
while (cases.Count < minCount && cases.Count < maxCount && cases.Count > 0)
{
cancellationToken.ThrowIfCancellationRequested();
var baseCase = cases[expansionIndex % cases.Count];
var template = expansionTemplates[expansionIndex % expansionTemplates.Length];
var query = KnowledgeSearchText.NormalizeWhitespace(string.Format(template, baseCase.Query.ToLowerInvariant()));
expansionIndex++;
if (!seenQueries.Add(query))
{
continue;
}
var caseId = KnowledgeSearchText.StableId("bench-case", baseCase.CaseId, expansionIndex.ToString());
cases.Add(baseCase with
{
CaseId = caseId,
Query = query,
});
}
return cases
.OrderBy(static item => item.CaseId, StringComparer.Ordinal)
.ThenBy(static item => item.Query, StringComparer.Ordinal)
.ToImmutableArray();
}
private IEnumerable<string> EnumerateMarkdownFiles(string repositoryRoot)
{
foreach (var root in _options.MarkdownRoots.Where(static item => !string.IsNullOrWhiteSpace(item)))
{
var absoluteRoot = ResolvePath(repositoryRoot, root);
if (!Directory.Exists(absoluteRoot))
{
continue;
}
foreach (var filePath in Directory.EnumerateFiles(absoluteRoot, "*.md", SearchOption.AllDirectories))
{
var relativePath = ToRelativeRepositoryPath(repositoryRoot, filePath);
if (relativePath.StartsWith("docs-archived/", StringComparison.OrdinalIgnoreCase))
{
continue;
}
yield return filePath;
}
}
}
private IEnumerable<string> EnumerateOpenApiFiles(string repositoryRoot)
{
foreach (var root in _options.OpenApiRoots.Where(static item => !string.IsNullOrWhiteSpace(item)))
{
var absoluteRoot = ResolvePath(repositoryRoot, root);
if (!Directory.Exists(absoluteRoot))
{
continue;
}
foreach (var filePath in Directory.EnumerateFiles(absoluteRoot, "openapi.json", SearchOption.AllDirectories))
{
yield return filePath;
}
}
}
private static string ResolvePath(string repositoryRoot, string configuredPath)
{
if (Path.IsPathRooted(configuredPath))
{
return Path.GetFullPath(configuredPath);
}
return Path.GetFullPath(Path.Combine(repositoryRoot, configuredPath));
}
private static string ToRelativeRepositoryPath(string repositoryRoot, string absolutePath)
{
var relative = Path.GetRelativePath(repositoryRoot, absolutePath);
return relative.Replace('\\', '/');
}
private static bool TryReadHeading(string line, out string heading)
{
var match = HeadingPattern.Match(line);
if (!match.Success)
{
heading = string.Empty;
return false;
}
heading = Regex.Replace(match.Groups[1].Value, "\\s+", " ").Trim();
if (string.IsNullOrWhiteSpace(heading))
{
heading = string.Empty;
return false;
}
return true;
}
private static IReadOnlyList<string> BuildDocQueryHints(string title, string relativePath)
{
var fileName = Path.GetFileNameWithoutExtension(relativePath);
var hints = new SortedSet<string>(StringComparer.OrdinalIgnoreCase)
{
title,
$"how do i {title.ToLowerInvariant()}",
$"runbook {title.ToLowerInvariant()}",
$"{fileName} {title}".ToLowerInvariant(),
$"{relativePath} {title}".ToLowerInvariant(),
};
return hints.ToImmutableArray();
}
private static IReadOnlyList<string> BuildApiQueryHints(
string service,
string method,
string path,
string title,
string summary,
IReadOnlyList<string> tags)
{
var hints = new SortedSet<string>(StringComparer.OrdinalIgnoreCase)
{
$"{method} {path}",
$"{service} {method} {path}",
title,
$"endpoint for {summary.ToLowerInvariant()}",
$"api {service} {summary.ToLowerInvariant()}",
};
foreach (var tag in tags.Where(static item => !string.IsNullOrWhiteSpace(item)))
{
hints.Add($"{service} endpoint {tag}".ToLowerInvariant());
}
return hints.ToImmutableArray();
}
private static IReadOnlyList<string> BuildDoctorQueryHints(DoctorSearchSeedEntry entry)
{
var hints = new SortedSet<string>(StringComparer.OrdinalIgnoreCase)
{
entry.CheckCode,
$"doctor {entry.Title}".ToLowerInvariant(),
$"run check {entry.CheckCode}".ToLowerInvariant(),
entry.Title,
};
foreach (var symptom in entry.Symptoms.Where(static item => !string.IsNullOrWhiteSpace(item)))
{
hints.Add(symptom.Trim());
}
return hints.ToImmutableArray();
}
private static string DeriveOpenApiServiceName(string relativePath, JsonElement root)
{
var title = TryGetNestedString(root, "info", "title");
if (!string.IsNullOrWhiteSpace(title))
{
return KnowledgeSearchText.Slugify(title);
}
var segments = relativePath.Split('/', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
if (segments.Length >= 2)
{
return KnowledgeSearchText.Slugify(segments[^2]);
}
return "service";
}
private static string? TryGetString(JsonElement element, string propertyName)
{
if (element.ValueKind != JsonValueKind.Object || !element.TryGetProperty(propertyName, out var property))
{
return null;
}
return property.ValueKind == JsonValueKind.String ? property.GetString() : null;
}
private static string? TryGetNestedString(JsonElement element, string parentProperty, string childProperty)
{
if (element.ValueKind != JsonValueKind.Object ||
!element.TryGetProperty(parentProperty, out var parent) ||
parent.ValueKind != JsonValueKind.Object ||
!parent.TryGetProperty(childProperty, out var child) ||
child.ValueKind != JsonValueKind.String)
{
return null;
}
return child.GetString();
}
private static IReadOnlyList<string> ExtractStringArray(JsonElement element, string propertyName)
{
if (element.ValueKind != JsonValueKind.Object ||
!element.TryGetProperty(propertyName, out var property) ||
property.ValueKind != JsonValueKind.Array)
{
return [];
}
return property.EnumerateArray()
.Where(static item => item.ValueKind == JsonValueKind.String)
.Select(static item => item.GetString())
.Where(static item => !string.IsNullOrWhiteSpace(item))
.Select(static item => item!.Trim())
.Distinct(StringComparer.OrdinalIgnoreCase)
.OrderBy(static item => item, StringComparer.OrdinalIgnoreCase)
.ToImmutableArray();
}
private sealed record BenchmarkTarget(
string TargetId,
string Type,
string Title,
KnowledgeBenchmarkExpectedTarget Expected,
IReadOnlyList<string> QueryHints);
}

View File

@@ -0,0 +1,63 @@
using System.Text.Json;
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
public static class KnowledgeSearchBenchmarkJson
{
private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web)
{
WriteIndented = true
};
public static async Task SaveDatasetAsync(
KnowledgeBenchmarkDataset dataset,
string absolutePath,
CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(dataset);
ArgumentException.ThrowIfNullOrWhiteSpace(absolutePath);
var directory = Path.GetDirectoryName(absolutePath);
if (!string.IsNullOrWhiteSpace(directory))
{
Directory.CreateDirectory(directory);
}
await using var stream = File.Create(absolutePath);
await JsonSerializer.SerializeAsync(stream, dataset, JsonOptions, cancellationToken).ConfigureAwait(false);
}
public static async Task<KnowledgeBenchmarkDataset> LoadDatasetAsync(
string absolutePath,
CancellationToken cancellationToken)
{
ArgumentException.ThrowIfNullOrWhiteSpace(absolutePath);
await using var stream = File.OpenRead(absolutePath);
var dataset = await JsonSerializer.DeserializeAsync<KnowledgeBenchmarkDataset>(stream, JsonOptions, cancellationToken).ConfigureAwait(false);
if (dataset is null)
{
throw new InvalidOperationException($"Failed to deserialize benchmark dataset from {absolutePath}.");
}
return dataset;
}
public static async Task SaveRunResultAsync(
KnowledgeBenchmarkRunResult result,
string absolutePath,
CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(result);
ArgumentException.ThrowIfNullOrWhiteSpace(absolutePath);
var directory = Path.GetDirectoryName(absolutePath);
if (!string.IsNullOrWhiteSpace(directory))
{
Directory.CreateDirectory(directory);
}
await using var stream = File.Create(absolutePath);
await JsonSerializer.SerializeAsync(stream, result, JsonOptions, cancellationToken).ConfigureAwait(false);
}
}

View File

@@ -0,0 +1,79 @@
using System.Text.Json.Serialization;
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
public sealed record KnowledgeBenchmarkDatasetOptions
{
public int MinQueryCount { get; init; } = 1000;
public int MaxQueryCount { get; init; } = 10000;
public int VariantsPerTarget { get; init; } = 8;
}
public sealed record KnowledgeBenchmarkDataset(
string Product,
string Version,
DateTimeOffset GeneratedAtUtc,
IReadOnlyList<KnowledgeBenchmarkCase> Cases);
public sealed record KnowledgeBenchmarkCase(
string CaseId,
string Query,
string Type,
KnowledgeBenchmarkExpectedTarget Expected);
public sealed record KnowledgeBenchmarkExpectedTarget
{
public string Type { get; init; } = "docs";
public string? Path { get; init; }
public string? Anchor { get; init; }
public string? Method { get; init; }
[JsonPropertyName("pathTemplate")]
public string? ApiPath { get; init; }
public string? OperationId { get; init; }
public string? CheckCode { get; init; }
}
public sealed record KnowledgeBenchmarkRunOptions
{
public int TopK { get; init; } = 5;
public int? MaxQueries { get; init; }
public int StabilityPasses { get; init; } = 2;
public double MinRecallAtK { get; init; } = 0.85d;
public double MaxP95LatencyMs { get; init; } = 500d;
}
public sealed record KnowledgeBenchmarkRunResult(
KnowledgeBenchmarkMetrics Overall,
IReadOnlyDictionary<string, KnowledgeBenchmarkMetrics> ByType,
double StabilityRate,
bool Passed,
IReadOnlyList<KnowledgeBenchmarkCaseResult> Cases);
public sealed record KnowledgeBenchmarkMetrics(
int Total,
int Matched,
double RecallAtK,
double P50LatencyMs,
double P95LatencyMs);
public sealed record KnowledgeBenchmarkCaseResult(
string CaseId,
string Query,
string Type,
bool Matched,
int Rank,
double LatencyMs,
string? TopReference,
string ExpectedReference);

View File

@@ -0,0 +1,213 @@
using Microsoft.Extensions.Logging;
using System.Collections.Immutable;
using System.Diagnostics;
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
internal sealed class KnowledgeSearchBenchmarkRunner : IKnowledgeSearchBenchmarkRunner
{
private readonly IKnowledgeSearchService _searchService;
private readonly ILogger<KnowledgeSearchBenchmarkRunner> _logger;
public KnowledgeSearchBenchmarkRunner(
IKnowledgeSearchService searchService,
ILogger<KnowledgeSearchBenchmarkRunner> logger)
{
_searchService = searchService ?? throw new ArgumentNullException(nameof(searchService));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
}
public async Task<KnowledgeBenchmarkRunResult> RunAsync(
KnowledgeBenchmarkDataset dataset,
KnowledgeBenchmarkRunOptions options,
CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(dataset);
ArgumentNullException.ThrowIfNull(options);
var topK = Math.Clamp(options.TopK, 1, 100);
var cases = options.MaxQueries.HasValue
? dataset.Cases.Take(Math.Max(1, options.MaxQueries.Value)).ToArray()
: dataset.Cases.ToArray();
if (cases.Length == 0)
{
var empty = new KnowledgeBenchmarkMetrics(0, 0, 0d, 0d, 0d);
return new KnowledgeBenchmarkRunResult(empty, new Dictionary<string, KnowledgeBenchmarkMetrics>(StringComparer.Ordinal), 1d, false, []);
}
var firstPass = await ExecutePassAsync(cases, topK, cancellationToken).ConfigureAwait(false);
double stabilityRate;
if (options.StabilityPasses > 1)
{
var secondPass = await ExecutePassAsync(cases, topK, cancellationToken).ConfigureAwait(false);
var stable = 0;
for (var index = 0; index < firstPass.Count; index++)
{
if (string.Equals(firstPass[index].TopReference, secondPass[index].TopReference, StringComparison.Ordinal))
{
stable++;
}
}
stabilityRate = stable / (double)firstPass.Count;
}
else
{
stabilityRate = 1d;
}
var overall = BuildMetrics(firstPass);
var byType = firstPass
.GroupBy(static item => item.Type, StringComparer.Ordinal)
.OrderBy(static item => item.Key, StringComparer.Ordinal)
.ToDictionary(
static group => group.Key,
static group => BuildMetrics(group.ToArray()),
StringComparer.Ordinal);
var passed = overall.RecallAtK >= options.MinRecallAtK &&
overall.P95LatencyMs <= options.MaxP95LatencyMs;
_logger.LogInformation(
"Knowledge benchmark completed. cases={Cases}, recall@k={RecallAtK:F4}, p95={P95:F2}ms, stability={Stability:F4}, passed={Passed}",
firstPass.Count,
overall.RecallAtK,
overall.P95LatencyMs,
stabilityRate,
passed);
return new KnowledgeBenchmarkRunResult(overall, byType, stabilityRate, passed, firstPass);
}
private async Task<IReadOnlyList<KnowledgeBenchmarkCaseResult>> ExecutePassAsync(
IReadOnlyList<KnowledgeBenchmarkCase> cases,
int topK,
CancellationToken cancellationToken)
{
var results = new List<KnowledgeBenchmarkCaseResult>(cases.Count);
foreach (var testCase in cases)
{
cancellationToken.ThrowIfCancellationRequested();
var stopwatch = Stopwatch.StartNew();
var response = await _searchService.SearchAsync(
new KnowledgeSearchRequest(testCase.Query, topK, null, IncludeDebug: false),
cancellationToken)
.ConfigureAwait(false);
stopwatch.Stop();
var expectedReference = BuildExpectedReference(testCase.Expected);
var topReference = response.Results.Count > 0 ? BuildResultReference(response.Results[0]) : null;
var rank = 0;
for (var index = 0; index < response.Results.Count; index++)
{
if (IsMatch(response.Results[index], testCase.Expected))
{
rank = index + 1;
break;
}
}
var latencyMs = response.Diagnostics.DurationMs > 0
? response.Diagnostics.DurationMs
: stopwatch.Elapsed.TotalMilliseconds;
results.Add(new KnowledgeBenchmarkCaseResult(
testCase.CaseId,
testCase.Query,
testCase.Type,
rank > 0,
rank,
latencyMs,
topReference,
expectedReference));
}
return results.ToImmutableArray();
}
private static KnowledgeBenchmarkMetrics BuildMetrics(IReadOnlyList<KnowledgeBenchmarkCaseResult> cases)
{
if (cases.Count == 0)
{
return new KnowledgeBenchmarkMetrics(0, 0, 0d, 0d, 0d);
}
var matched = cases.Count(static item => item.Matched);
var latencies = cases
.Select(static item => item.LatencyMs)
.OrderBy(static item => item)
.ToArray();
return new KnowledgeBenchmarkMetrics(
cases.Count,
matched,
matched / (double)cases.Count,
Percentile(latencies, 0.5d),
Percentile(latencies, 0.95d));
}
private static double Percentile(IReadOnlyList<double> sortedValues, double percentile)
{
if (sortedValues.Count == 0)
{
return 0d;
}
var clamped = Math.Clamp(percentile, 0d, 1d);
var index = (int)Math.Ceiling(sortedValues.Count * clamped) - 1;
index = Math.Clamp(index, 0, sortedValues.Count - 1);
return sortedValues[index];
}
private static bool IsMatch(KnowledgeSearchResult result, KnowledgeBenchmarkExpectedTarget expected)
{
if (!result.Type.Equals(expected.Type, StringComparison.OrdinalIgnoreCase))
{
return false;
}
return expected.Type switch
{
"docs" => result.Open.Docs is not null &&
string.Equals(result.Open.Docs.Path, expected.Path, StringComparison.Ordinal) &&
string.Equals(result.Open.Docs.Anchor, expected.Anchor, StringComparison.OrdinalIgnoreCase),
"api" => result.Open.Api is not null &&
string.Equals(result.Open.Api.Method, expected.Method, StringComparison.OrdinalIgnoreCase) &&
string.Equals(result.Open.Api.Path, expected.ApiPath, StringComparison.Ordinal) &&
(string.IsNullOrWhiteSpace(expected.OperationId) ||
string.Equals(result.Open.Api.OperationId, expected.OperationId, StringComparison.OrdinalIgnoreCase)),
"doctor" => result.Open.Doctor is not null &&
string.Equals(result.Open.Doctor.CheckCode, expected.CheckCode, StringComparison.OrdinalIgnoreCase),
_ => false,
};
}
private static string BuildResultReference(KnowledgeSearchResult result)
{
return result.Type switch
{
"docs" when result.Open.Docs is not null =>
$"docs:{result.Open.Docs.Path}#{result.Open.Docs.Anchor}",
"api" when result.Open.Api is not null =>
$"api:{result.Open.Api.Method}:{result.Open.Api.Path}:{result.Open.Api.OperationId}",
"doctor" when result.Open.Doctor is not null =>
$"doctor:{result.Open.Doctor.CheckCode}",
_ => result.Title,
};
}
private static string BuildExpectedReference(KnowledgeBenchmarkExpectedTarget expected)
{
return expected.Type switch
{
"docs" => $"docs:{expected.Path}#{expected.Anchor}",
"api" => $"api:{expected.Method}:{expected.ApiPath}:{expected.OperationId}",
"doctor" => $"doctor:{expected.CheckCode}",
_ => expected.Type,
};
}
}

View File

@@ -0,0 +1,164 @@
using System.Collections.Immutable;
using System.Text.Json;
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
public sealed record KnowledgeSearchRequest(
string Q,
int? K = null,
KnowledgeSearchFilter? Filters = null,
bool IncludeDebug = false);
public sealed record KnowledgeSearchFilter
{
public IReadOnlyList<string>? Type { get; init; }
public string? Product { get; init; }
public string? Version { get; init; }
public string? Service { get; init; }
public IReadOnlyList<string>? Tags { get; init; }
}
public sealed record KnowledgeSearchResponse(
string Query,
int TopK,
IReadOnlyList<KnowledgeSearchResult> Results,
KnowledgeSearchDiagnostics Diagnostics);
public sealed record KnowledgeSearchResult(
string Type,
string Title,
string Snippet,
double Score,
KnowledgeOpenAction Open,
IReadOnlyDictionary<string, string>? Debug = null);
public sealed record KnowledgeOpenAction(
KnowledgeOpenActionType Kind,
KnowledgeOpenDocAction? Docs = null,
KnowledgeOpenApiAction? Api = null,
KnowledgeOpenDoctorAction? Doctor = null);
public enum KnowledgeOpenActionType
{
Docs = 1,
Api = 2,
Doctor = 3
}
public sealed record KnowledgeOpenDocAction(
string Path,
string Anchor,
int SpanStart,
int SpanEnd);
public sealed record KnowledgeOpenApiAction(
string Service,
string Method,
string Path,
string OperationId);
public sealed record KnowledgeOpenDoctorAction(
string CheckCode,
string Severity,
bool CanRun,
string RunCommand);
public sealed record KnowledgeSearchDiagnostics(
int FtsMatches,
int VectorMatches,
long DurationMs,
bool UsedVector,
string Mode);
internal sealed record KnowledgeSourceDocument(
string DocId,
string DocType,
string Product,
string Version,
string SourceRef,
string Path,
string Title,
string ContentHash,
JsonDocument Metadata);
internal sealed record KnowledgeChunkDocument(
string ChunkId,
string DocId,
string Kind,
string? Anchor,
string? SectionPath,
int SpanStart,
int SpanEnd,
string Title,
string Body,
float[]? Embedding,
JsonDocument Metadata);
internal sealed record KnowledgeApiSpec(
string SpecId,
string DocId,
string Service,
string? OpenApiVersion,
string? Title,
string? Version,
string SourcePath,
JsonDocument Content);
internal sealed record KnowledgeApiOperation(
string OperationKey,
string SpecId,
string ChunkId,
string Service,
string Method,
string Path,
string? OperationId,
IReadOnlyList<string> Tags,
string? Summary,
JsonDocument RequestJson,
JsonDocument ResponsesJson,
JsonDocument SecurityJson);
internal sealed record KnowledgeDoctorProjection(
string CheckCode,
string ChunkId,
string Title,
string Severity,
string Remediation,
string RunCommand,
IReadOnlyList<string> Symptoms,
JsonDocument ReferencesJson,
JsonDocument MetadataJson);
internal sealed record KnowledgeIndexSnapshot(
IReadOnlyList<KnowledgeSourceDocument> Documents,
IReadOnlyList<KnowledgeChunkDocument> Chunks,
IReadOnlyList<KnowledgeApiSpec> ApiSpecs,
IReadOnlyList<KnowledgeApiOperation> ApiOperations,
IReadOnlyList<KnowledgeDoctorProjection> DoctorProjections);
public sealed record KnowledgeRebuildSummary(
int DocumentCount,
int ChunkCount,
int ApiSpecCount,
int ApiOperationCount,
int DoctorProjectionCount,
long DurationMs);
internal sealed record KnowledgeChunkRow(
string ChunkId,
string DocId,
string Kind,
string? Anchor,
string? SectionPath,
int SpanStart,
int SpanEnd,
string Title,
string Body,
string Snippet,
JsonDocument Metadata,
float[]? Embedding,
double LexicalScore);

View File

@@ -0,0 +1,48 @@
using System.ComponentModel.DataAnnotations;
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
public sealed class KnowledgeSearchOptions
{
public const string SectionName = "AdvisoryAI:KnowledgeSearch";
public bool Enabled { get; set; } = true;
public string ConnectionString { get; set; } = string.Empty;
public string Product { get; set; } = "stella-ops";
public string Version { get; set; } = "local";
public string RepositoryRoot { get; set; } = ".";
[Range(1, 384)]
public int DefaultTopK { get; set; } = 10;
[Range(1, 2000)]
public int MaxQueryLength { get; set; } = 512;
[Range(16, 2048)]
public int VectorDimensions { get; set; } = 384;
[Range(10, 5000)]
public int FtsCandidateCount { get; set; } = 120;
[Range(10, 10000)]
public int VectorScanLimit { get; set; } = 3000;
[Range(10, 5000)]
public int VectorCandidateCount { get; set; } = 120;
[Range(250, 30000)]
public int QueryTimeoutMs { get; set; } = 3000;
public string DoctorChecksEndpoint { get; set; } = string.Empty;
public string DoctorSeedPath { get; set; } =
"src/AdvisoryAI/StellaOps.AdvisoryAI/KnowledgeSearch/doctor-search-seed.json";
public List<string> MarkdownRoots { get; set; } = ["docs"];
public List<string> OpenApiRoots { get; set; } = ["src", "devops/compose"];
}

View File

@@ -0,0 +1,394 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using StellaOps.AdvisoryAI.Vectorization;
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using System.Text.Json;
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
internal sealed class KnowledgeSearchService : IKnowledgeSearchService
{
private const int ReciprocalRankConstant = 60;
private static readonly Regex MethodPathPattern = new("\\b(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS|TRACE)\\s+(/[^\\s]+)", RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase);
private readonly KnowledgeSearchOptions _options;
private readonly IKnowledgeSearchStore _store;
private readonly IVectorEncoder _vectorEncoder;
private readonly ILogger<KnowledgeSearchService> _logger;
private readonly TimeProvider _timeProvider;
public KnowledgeSearchService(
IOptions<KnowledgeSearchOptions> options,
IKnowledgeSearchStore store,
IVectorEncoder vectorEncoder,
ILogger<KnowledgeSearchService> logger,
TimeProvider timeProvider)
{
ArgumentNullException.ThrowIfNull(options);
_options = options.Value ?? new KnowledgeSearchOptions();
_store = store ?? throw new ArgumentNullException(nameof(store));
_vectorEncoder = vectorEncoder ?? throw new ArgumentNullException(nameof(vectorEncoder));
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_timeProvider = timeProvider ?? throw new ArgumentNullException(nameof(timeProvider));
}
public async Task<KnowledgeSearchResponse> SearchAsync(KnowledgeSearchRequest request, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(request);
var startedAt = _timeProvider.GetUtcNow();
var query = NormalizeQuery(request.Q);
if (string.IsNullOrWhiteSpace(query))
{
return new KnowledgeSearchResponse(
string.Empty,
ResolveTopK(request.K),
[],
new KnowledgeSearchDiagnostics(0, 0, 0, false, "empty"));
}
if (!_options.Enabled || string.IsNullOrWhiteSpace(_options.ConnectionString))
{
return new KnowledgeSearchResponse(
query,
ResolveTopK(request.K),
[],
new KnowledgeSearchDiagnostics(0, 0, 0, false, "disabled"));
}
var topK = ResolveTopK(request.K);
var timeout = TimeSpan.FromMilliseconds(Math.Max(250, _options.QueryTimeoutMs));
var ftsRows = await _store.SearchFtsAsync(
query,
request.Filters,
Math.Max(topK, _options.FtsCandidateCount),
timeout,
cancellationToken).ConfigureAwait(false);
var lexicalRanks = ftsRows
.Select((row, index) => (row.ChunkId, Rank: index + 1, Row: row))
.ToDictionary(static item => item.ChunkId, static item => item, StringComparer.Ordinal);
var vectorRows = Array.Empty<(KnowledgeChunkRow Row, int Rank, double Score)>();
var usedVector = false;
try
{
var queryEmbedding = EncodeQueryEmbedding(query);
if (queryEmbedding.Length > 0)
{
var candidates = await _store.LoadVectorCandidatesAsync(
queryEmbedding,
request.Filters,
Math.Max(topK, _options.VectorScanLimit),
timeout,
cancellationToken).ConfigureAwait(false);
var rankedVectors = candidates
.Select(row => (Row: row, Score: row.Embedding is { Length: > 0 }
? KnowledgeSearchText.CosineSimilarity(queryEmbedding, row.Embedding)
: 0d))
.Where(static item => item.Score > 0d)
.OrderByDescending(static item => item.Score)
.ThenBy(static item => item.Row.ChunkId, StringComparer.Ordinal)
.Take(Math.Max(topK, _options.VectorCandidateCount))
.Select((item, index) => (item.Row, Rank: index + 1, item.Score))
.ToArray();
vectorRows = rankedVectors;
usedVector = rankedVectors.Length > 0;
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Knowledge search vector stage failed; continuing with lexical results only.");
}
var merged = FuseRanks(query, request, lexicalRanks, vectorRows);
var results = merged
.Take(topK)
.Select(item => BuildResult(item.Row, query, item.Score, request.IncludeDebug, item.Debug))
.ToArray();
var duration = _timeProvider.GetUtcNow() - startedAt;
return new KnowledgeSearchResponse(
query,
topK,
results,
new KnowledgeSearchDiagnostics(
ftsRows.Count,
vectorRows.Length,
(long)duration.TotalMilliseconds,
usedVector,
usedVector ? "hybrid" : "fts-only"));
}
private IReadOnlyList<(KnowledgeChunkRow Row, double Score, IReadOnlyDictionary<string, string> Debug)> FuseRanks(
string query,
KnowledgeSearchRequest request,
IReadOnlyDictionary<string, (string ChunkId, int Rank, KnowledgeChunkRow Row)> lexicalRanks,
IReadOnlyList<(KnowledgeChunkRow Row, int Rank, double Score)> vectorRanks)
{
var merged = new Dictionary<string, (KnowledgeChunkRow Row, double Score, Dictionary<string, string> Debug)>(StringComparer.Ordinal);
foreach (var lexical in lexicalRanks.Values)
{
var score = ReciprocalRank(lexical.Rank);
var debug = new Dictionary<string, string>(StringComparer.Ordinal)
{
["lexicalRank"] = lexical.Rank.ToString(),
["lexicalScore"] = lexical.Row.LexicalScore.ToString("F6", System.Globalization.CultureInfo.InvariantCulture)
};
merged[lexical.ChunkId] = (lexical.Row, score, debug);
}
foreach (var vector in vectorRanks)
{
if (!merged.TryGetValue(vector.Row.ChunkId, out var existing))
{
existing = (vector.Row, 0d, new Dictionary<string, string>(StringComparer.Ordinal));
}
existing.Score += ReciprocalRank(vector.Rank);
existing.Debug["vectorRank"] = vector.Rank.ToString();
existing.Debug["vectorScore"] = vector.Score.ToString("F6", System.Globalization.CultureInfo.InvariantCulture);
merged[vector.Row.ChunkId] = existing;
}
var ranked = merged.Values
.Select(item =>
{
var boost = ComputeBoost(query, request.Filters, item.Row);
item.Score += boost;
item.Debug["boost"] = boost.ToString("F6", System.Globalization.CultureInfo.InvariantCulture);
item.Debug["chunkId"] = item.Row.ChunkId;
return item;
})
.OrderByDescending(static item => item.Score)
.ThenBy(static item => item.Row.Kind, StringComparer.Ordinal)
.ThenBy(static item => item.Row.ChunkId, StringComparer.Ordinal)
.Select(item => (item.Row, item.Score, (IReadOnlyDictionary<string, string>)item.Debug))
.ToArray();
return ranked;
}
private double ComputeBoost(string query, KnowledgeSearchFilter? filters, KnowledgeChunkRow row)
{
var normalizedQuery = query.Trim();
var lowerQuery = normalizedQuery.ToLowerInvariant();
var metadata = row.Metadata.RootElement;
var boost = 0d;
if (row.Kind.Equals("doctor_check", StringComparison.OrdinalIgnoreCase))
{
var checkCode = GetMetadataString(metadata, "checkCode");
if (!string.IsNullOrWhiteSpace(checkCode) && checkCode.Equals(normalizedQuery, StringComparison.OrdinalIgnoreCase))
{
boost += 1.4d;
}
}
if (row.Kind.Equals("api_operation", StringComparison.OrdinalIgnoreCase))
{
var operationId = GetMetadataString(metadata, "operationId");
if (!string.IsNullOrWhiteSpace(operationId) && operationId.Equals(normalizedQuery, StringComparison.OrdinalIgnoreCase))
{
boost += 1.2d;
}
var method = GetMetadataString(metadata, "method");
var path = GetMetadataString(metadata, "path");
if (!string.IsNullOrWhiteSpace(method) && !string.IsNullOrWhiteSpace(path))
{
var match = MethodPathPattern.Match(query);
if (match.Success &&
method.Equals(match.Groups[1].Value, StringComparison.OrdinalIgnoreCase) &&
path.Equals(match.Groups[2].Value, StringComparison.Ordinal))
{
boost += 1.3d;
}
var methodPath = $"{method} {path}".ToLowerInvariant();
if (lowerQuery.Contains(methodPath, StringComparison.Ordinal))
{
boost += 0.3d;
}
}
}
if (!string.IsNullOrWhiteSpace(row.Title) && row.Title.Equals(normalizedQuery, StringComparison.OrdinalIgnoreCase))
{
boost += 0.4d;
}
if (filters is not null)
{
if (!string.IsNullOrWhiteSpace(filters.Service))
{
var rowService = GetMetadataString(metadata, "service");
if (!string.IsNullOrWhiteSpace(rowService) && rowService.Equals(filters.Service, StringComparison.OrdinalIgnoreCase))
{
boost += 0.2d;
}
}
if (filters.Tags is { Count: > 0 })
{
var rowTags = GetMetadataStringArray(metadata, "tags");
if (filters.Tags.Any(filterTag => rowTags.Contains(filterTag, StringComparer.OrdinalIgnoreCase)))
{
boost += 0.15d;
}
}
}
return boost;
}
private KnowledgeSearchResult BuildResult(
KnowledgeChunkRow row,
string query,
double score,
bool includeDebug,
IReadOnlyDictionary<string, string> debug)
{
var resultType = row.Kind switch
{
"api_operation" => "api",
"doctor_check" => "doctor",
_ => "docs"
};
var metadata = row.Metadata.RootElement;
var snippet = string.IsNullOrWhiteSpace(row.Snippet)
? KnowledgeSearchText.BuildSnippet(row.Body, query)
: row.Snippet;
var action = resultType switch
{
"api" => new KnowledgeOpenAction(
KnowledgeOpenActionType.Api,
Api: new KnowledgeOpenApiAction(
GetMetadataString(metadata, "service") ?? "unknown",
GetMetadataString(metadata, "method") ?? "GET",
GetMetadataString(metadata, "path") ?? "/",
GetMetadataString(metadata, "operationId") ?? row.Title)),
"doctor" => new KnowledgeOpenAction(
KnowledgeOpenActionType.Doctor,
Doctor: new KnowledgeOpenDoctorAction(
GetMetadataString(metadata, "checkCode") ?? row.Title,
GetMetadataString(metadata, "severity") ?? "warn",
true,
GetMetadataString(metadata, "runCommand") ?? $"stella doctor run --check {row.Title}")),
_ => new KnowledgeOpenAction(
KnowledgeOpenActionType.Docs,
Docs: new KnowledgeOpenDocAction(
GetMetadataString(metadata, "path") ?? string.Empty,
row.Anchor ?? GetMetadataString(metadata, "anchor") ?? "overview",
row.SpanStart,
row.SpanEnd))
};
return new KnowledgeSearchResult(
resultType,
row.Title,
snippet,
score,
action,
includeDebug ? debug : null);
}
private float[] EncodeQueryEmbedding(string query)
{
var raw = _vectorEncoder.Encode(query);
if (raw.Length == 0)
{
return raw;
}
var dimensions = Math.Max(1, _options.VectorDimensions);
var normalized = new float[dimensions];
var copy = Math.Min(raw.Length, dimensions);
Array.Copy(raw, normalized, copy);
var norm = 0d;
for (var index = 0; index < normalized.Length; index++)
{
norm += normalized[index] * normalized[index];
}
if (norm <= 0d)
{
return normalized;
}
var magnitude = Math.Sqrt(norm);
for (var index = 0; index < normalized.Length; index++)
{
normalized[index] = (float)(normalized[index] / magnitude);
}
return normalized;
}
private static string? GetMetadataString(JsonElement metadata, string propertyName)
{
if (metadata.ValueKind != JsonValueKind.Object || !metadata.TryGetProperty(propertyName, out var value) || value.ValueKind != JsonValueKind.String)
{
return null;
}
return value.GetString();
}
private static IReadOnlyList<string> GetMetadataStringArray(JsonElement metadata, string propertyName)
{
if (metadata.ValueKind != JsonValueKind.Object || !metadata.TryGetProperty(propertyName, out var value) || value.ValueKind != JsonValueKind.Array)
{
return [];
}
return value.EnumerateArray()
.Where(static item => item.ValueKind == JsonValueKind.String)
.Select(static item => item.GetString())
.Where(static item => !string.IsNullOrWhiteSpace(item))
.Select(static item => item!.Trim())
.ToImmutableArray();
}
private int ResolveTopK(int? requested)
{
var fallback = Math.Max(1, _options.DefaultTopK);
if (!requested.HasValue)
{
return fallback;
}
return Math.Clamp(requested.Value, 1, 100);
}
private string NormalizeQuery(string query)
{
var normalized = KnowledgeSearchText.NormalizeWhitespace(query);
if (normalized.Length <= _options.MaxQueryLength)
{
return normalized;
}
return normalized[.._options.MaxQueryLength].TrimEnd();
}
private static double ReciprocalRank(int rank)
{
if (rank <= 0)
{
return 0d;
}
return 1d / (ReciprocalRankConstant + rank);
}
}

View File

@@ -0,0 +1,28 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
public static class KnowledgeSearchServiceCollectionExtensions
{
public static IServiceCollection AddAdvisoryKnowledgeSearch(
this IServiceCollection services,
IConfiguration configuration)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(configuration);
services.AddOptions<KnowledgeSearchOptions>()
.Bind(configuration.GetSection(KnowledgeSearchOptions.SectionName))
.ValidateDataAnnotations();
services.TryAddSingleton<IKnowledgeSearchStore, PostgresKnowledgeSearchStore>();
services.TryAddSingleton<IKnowledgeIndexer, KnowledgeIndexer>();
services.TryAddSingleton<IKnowledgeSearchService, KnowledgeSearchService>();
services.TryAddSingleton<IKnowledgeSearchBenchmarkDatasetGenerator, KnowledgeSearchBenchmarkDatasetGenerator>();
services.TryAddSingleton<IKnowledgeSearchBenchmarkRunner, KnowledgeSearchBenchmarkRunner>();
return services;
}
}

View File

@@ -0,0 +1,112 @@
using System.Globalization;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
internal static class KnowledgeSearchText
{
private static readonly Regex SlugRegex = new("[^a-z0-9]+", RegexOptions.Compiled | RegexOptions.CultureInvariant);
private static readonly Regex MultiSpaceRegex = new("\\s+", RegexOptions.Compiled | RegexOptions.CultureInvariant);
public static string NormalizeWhitespace(string value)
{
if (string.IsNullOrWhiteSpace(value))
{
return string.Empty;
}
var normalized = value.Replace("\r\n", "\n", StringComparison.Ordinal)
.Replace('\r', '\n')
.Trim();
return MultiSpaceRegex.Replace(normalized, " ").Trim();
}
public static string Slugify(string value)
{
if (string.IsNullOrWhiteSpace(value))
{
return "section";
}
var lower = value.Trim().ToLowerInvariant();
var slug = SlugRegex.Replace(lower, "-").Trim('-');
return string.IsNullOrWhiteSpace(slug) ? "section" : slug;
}
public static string StableId(params string[] components)
{
var normalized = string.Join("|", components.Select(NormalizeWhitespace));
var bytes = Encoding.UTF8.GetBytes(normalized);
var hash = SHA256.HashData(bytes);
return Convert.ToHexStringLower(hash);
}
public static string BuildSnippet(string body, string query)
{
var normalizedBody = NormalizeWhitespace(body);
if (normalizedBody.Length == 0)
{
return string.Empty;
}
var term = NormalizeWhitespace(query).ToLowerInvariant();
if (term.Length == 0)
{
return TrimSnippet(normalizedBody, 240);
}
var index = normalizedBody.IndexOf(term, StringComparison.OrdinalIgnoreCase);
if (index < 0)
{
return TrimSnippet(normalizedBody, 240);
}
const int around = 96;
var start = Math.Max(index - around, 0);
var length = Math.Min(around * 2 + term.Length, normalizedBody.Length - start);
var snippet = normalizedBody.Substring(start, length);
var highlighted = Regex.Replace(
snippet,
Regex.Escape(term),
static match => $"<mark>{match.Value}</mark>",
RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
return TrimSnippet(highlighted, 320);
}
public static double CosineSimilarity(float[] left, float[] right)
{
if (left.Length == 0 || right.Length == 0 || left.Length != right.Length)
{
return 0d;
}
double dot = 0d;
double leftNorm = 0d;
double rightNorm = 0d;
for (var i = 0; i < left.Length; i++)
{
dot += left[i] * right[i];
leftNorm += left[i] * left[i];
rightNorm += right[i] * right[i];
}
if (leftNorm <= 0d || rightNorm <= 0d)
{
return 0d;
}
return dot / Math.Sqrt(leftNorm * rightNorm);
}
private static string TrimSnippet(string value, int maxChars)
{
if (value.Length <= maxChars)
{
return value;
}
return value[..maxChars].TrimEnd() + "...";
}
}

View File

@@ -0,0 +1,956 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Npgsql;
using NpgsqlTypes;
using System.Globalization;
using System.Text.Json;
namespace StellaOps.AdvisoryAI.KnowledgeSearch;
internal sealed class PostgresKnowledgeSearchStore : IKnowledgeSearchStore, IAsyncDisposable
{
private static readonly JsonDocument EmptyJsonDocument = JsonDocument.Parse("{}");
private readonly KnowledgeSearchOptions _options;
private readonly ILogger<PostgresKnowledgeSearchStore> _logger;
private readonly Lazy<NpgsqlDataSource?> _dataSource;
private bool? _hasEmbeddingVectorColumn;
public PostgresKnowledgeSearchStore(
IOptions<KnowledgeSearchOptions> options,
ILogger<PostgresKnowledgeSearchStore> logger)
{
ArgumentNullException.ThrowIfNull(options);
_options = options.Value ?? new KnowledgeSearchOptions();
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
_dataSource = new Lazy<NpgsqlDataSource?>(CreateDataSource, isThreadSafe: true);
}
public async Task EnsureSchemaAsync(CancellationToken cancellationToken)
{
if (!IsConfigured())
{
_logger.LogDebug("Knowledge search schema ensure skipped because configuration is incomplete.");
return;
}
await using var connection = await GetDataSource().OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var transaction = await connection.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
const string createSchemaSql = "CREATE SCHEMA IF NOT EXISTS advisoryai;";
await ExecuteNonQueryAsync(connection, transaction, createSchemaSql, cancellationToken).ConfigureAwait(false);
const string createHistorySql = """
CREATE TABLE IF NOT EXISTS advisoryai.__migration_history
(
migration_name TEXT PRIMARY KEY,
applied_at TIMESTAMPTZ NOT NULL
);
""";
await ExecuteNonQueryAsync(connection, transaction, createHistorySql, cancellationToken).ConfigureAwait(false);
var scripts = LoadMigrationScripts();
foreach (var script in scripts)
{
if (await IsMigrationAppliedAsync(connection, transaction, script.Name, cancellationToken).ConfigureAwait(false))
{
continue;
}
_logger.LogInformation("Applying AdvisoryAI knowledge migration {Migration}.", script.Name);
await ExecuteNonQueryAsync(connection, transaction, script.Sql, cancellationToken).ConfigureAwait(false);
const string insertHistorySql = """
INSERT INTO advisoryai.__migration_history (migration_name, applied_at)
VALUES (@migration_name, NOW())
ON CONFLICT (migration_name) DO NOTHING;
""";
await using var history = connection.CreateCommand();
history.Transaction = transaction;
history.CommandText = insertHistorySql;
history.Parameters.AddWithValue("migration_name", script.Name);
history.CommandTimeout = ToCommandTimeoutSeconds(TimeSpan.FromSeconds(30));
await history.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
_hasEmbeddingVectorColumn = null;
}
public async Task ReplaceIndexAsync(KnowledgeIndexSnapshot snapshot, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(snapshot);
if (!IsConfigured())
{
_logger.LogDebug("Knowledge search replace-index skipped because configuration is incomplete.");
return;
}
await EnsureSchemaAsync(cancellationToken).ConfigureAwait(false);
await using var connection = await GetDataSource().OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
await using var transaction = await connection.BeginTransactionAsync(cancellationToken).ConfigureAwait(false);
const string truncateSql = "DELETE FROM advisoryai.kb_doc;";
await ExecuteNonQueryAsync(connection, transaction, truncateSql, cancellationToken).ConfigureAwait(false);
var hasEmbeddingVectorColumn = await HasEmbeddingVectorColumnAsync(connection, transaction, cancellationToken)
.ConfigureAwait(false);
await InsertDocumentsAsync(connection, transaction, snapshot.Documents, cancellationToken).ConfigureAwait(false);
await InsertChunksAsync(connection, transaction, snapshot.Chunks, hasEmbeddingVectorColumn, cancellationToken)
.ConfigureAwait(false);
await InsertApiSpecsAsync(connection, transaction, snapshot.ApiSpecs, cancellationToken).ConfigureAwait(false);
await InsertApiOperationsAsync(connection, transaction, snapshot.ApiOperations, cancellationToken).ConfigureAwait(false);
await InsertDoctorProjectionsAsync(connection, transaction, snapshot.DoctorProjections, cancellationToken)
.ConfigureAwait(false);
await transaction.CommitAsync(cancellationToken).ConfigureAwait(false);
}
public async Task<IReadOnlyList<KnowledgeChunkRow>> SearchFtsAsync(
string query,
KnowledgeSearchFilter? filters,
int take,
TimeSpan timeout,
CancellationToken cancellationToken)
{
if (!IsConfigured() || string.IsNullOrWhiteSpace(query) || take <= 0)
{
return [];
}
var kinds = ResolveKinds(filters);
var tags = ResolveTags(filters);
var normalizedProduct = NormalizeOptional(filters?.Product);
var normalizedVersion = NormalizeOptional(filters?.Version);
var normalizedService = NormalizeOptional(filters?.Service);
const string sql = """
WITH q AS (
SELECT websearch_to_tsquery('simple', @query) AS tsq
)
SELECT
c.chunk_id,
c.doc_id,
c.kind,
c.anchor,
c.section_path,
c.span_start,
c.span_end,
c.title,
c.body,
COALESCE(
NULLIF(ts_headline(
'simple',
c.body,
q.tsq,
'StartSel=<mark>, StopSel=</mark>, MaxFragments=2, MinWords=8, MaxWords=26, FragmentDelimiter= ... '
), ''),
substring(c.body from 1 for 320)
) AS snippet,
c.metadata,
ts_rank_cd(c.body_tsv, q.tsq, 32) AS lexical_score,
c.embedding
FROM advisoryai.kb_chunk AS c
INNER JOIN advisoryai.kb_doc AS d
ON d.doc_id = c.doc_id
CROSS JOIN q
WHERE c.body_tsv @@ q.tsq
AND (@kind_count = 0 OR c.kind = ANY(@kinds))
AND (@tag_count = 0 OR EXISTS (
SELECT 1
FROM jsonb_array_elements_text(COALESCE(c.metadata->'tags', '[]'::jsonb)) AS tag(value)
WHERE lower(tag.value) = ANY(@tags)
))
AND (@product = '' OR lower(d.product) = lower(@product))
AND (@version = '' OR lower(d.version) = lower(@version))
AND (@service = '' OR lower(COALESCE(c.metadata->>'service', '')) = lower(@service))
ORDER BY lexical_score DESC, c.chunk_id ASC
LIMIT @take;
""";
await using var command = CreateCommand(sql, timeout);
command.Parameters.AddWithValue("query", query);
command.Parameters.AddWithValue("take", take);
command.Parameters.AddWithValue("kind_count", kinds.Length);
command.Parameters.AddWithValue(
"kinds",
NpgsqlDbType.Array | NpgsqlDbType.Text,
kinds.Length == 0 ? Array.Empty<string>() : kinds);
command.Parameters.AddWithValue("tag_count", tags.Length);
command.Parameters.AddWithValue(
"tags",
NpgsqlDbType.Array | NpgsqlDbType.Text,
tags.Length == 0 ? Array.Empty<string>() : tags);
command.Parameters.AddWithValue("product", normalizedProduct);
command.Parameters.AddWithValue("version", normalizedVersion);
command.Parameters.AddWithValue("service", normalizedService);
return await ReadChunkRowsAsync(command, cancellationToken).ConfigureAwait(false);
}
public async Task<IReadOnlyList<KnowledgeChunkRow>> LoadVectorCandidatesAsync(
float[] queryEmbedding,
KnowledgeSearchFilter? filters,
int take,
TimeSpan timeout,
CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(queryEmbedding);
if (!IsConfigured() || queryEmbedding.Length == 0 || take <= 0)
{
return [];
}
var kinds = ResolveKinds(filters);
var tags = ResolveTags(filters);
var normalizedProduct = NormalizeOptional(filters?.Product);
var normalizedVersion = NormalizeOptional(filters?.Version);
var normalizedService = NormalizeOptional(filters?.Service);
var queryVectorLiteral = BuildVectorLiteral(queryEmbedding);
var useEmbeddingVectorColumn = await HasEmbeddingVectorColumnAsync(cancellationToken).ConfigureAwait(false);
var sql = useEmbeddingVectorColumn
? """
SELECT
c.chunk_id,
c.doc_id,
c.kind,
c.anchor,
c.section_path,
c.span_start,
c.span_end,
c.title,
c.body,
substring(c.body from 1 for 320) AS snippet,
c.metadata,
0::double precision AS lexical_score,
c.embedding
FROM advisoryai.kb_chunk AS c
INNER JOIN advisoryai.kb_doc AS d
ON d.doc_id = c.doc_id
WHERE c.embedding_vec IS NOT NULL
AND (@kind_count = 0 OR c.kind = ANY(@kinds))
AND (@tag_count = 0 OR EXISTS (
SELECT 1
FROM jsonb_array_elements_text(COALESCE(c.metadata->'tags', '[]'::jsonb)) AS tag(value)
WHERE lower(tag.value) = ANY(@tags)
))
AND (@product = '' OR lower(d.product) = lower(@product))
AND (@version = '' OR lower(d.version) = lower(@version))
AND (@service = '' OR lower(COALESCE(c.metadata->>'service', '')) = lower(@service))
ORDER BY c.embedding_vec <=> CAST(@query_vector AS vector), c.chunk_id ASC
LIMIT @take;
"""
: """
SELECT
c.chunk_id,
c.doc_id,
c.kind,
c.anchor,
c.section_path,
c.span_start,
c.span_end,
c.title,
c.body,
substring(c.body from 1 for 320) AS snippet,
c.metadata,
0::double precision AS lexical_score,
c.embedding
FROM advisoryai.kb_chunk AS c
INNER JOIN advisoryai.kb_doc AS d
ON d.doc_id = c.doc_id
WHERE c.embedding IS NOT NULL
AND (@kind_count = 0 OR c.kind = ANY(@kinds))
AND (@tag_count = 0 OR EXISTS (
SELECT 1
FROM jsonb_array_elements_text(COALESCE(c.metadata->'tags', '[]'::jsonb)) AS tag(value)
WHERE lower(tag.value) = ANY(@tags)
))
AND (@product = '' OR lower(d.product) = lower(@product))
AND (@version = '' OR lower(d.version) = lower(@version))
AND (@service = '' OR lower(COALESCE(c.metadata->>'service', '')) = lower(@service))
ORDER BY c.chunk_id ASC
LIMIT @take;
""";
await using var command = CreateCommand(sql, timeout);
command.Parameters.AddWithValue("take", take);
command.Parameters.AddWithValue("kind_count", kinds.Length);
command.Parameters.AddWithValue(
"kinds",
NpgsqlDbType.Array | NpgsqlDbType.Text,
kinds.Length == 0 ? Array.Empty<string>() : kinds);
command.Parameters.AddWithValue("tag_count", tags.Length);
command.Parameters.AddWithValue(
"tags",
NpgsqlDbType.Array | NpgsqlDbType.Text,
tags.Length == 0 ? Array.Empty<string>() : tags);
command.Parameters.AddWithValue("product", normalizedProduct);
command.Parameters.AddWithValue("version", normalizedVersion);
command.Parameters.AddWithValue("service", normalizedService);
command.Parameters.AddWithValue("query_vector", queryVectorLiteral);
return await ReadChunkRowsAsync(command, cancellationToken).ConfigureAwait(false);
}
public async ValueTask DisposeAsync()
{
if (_dataSource.IsValueCreated && _dataSource.Value is not null)
{
await _dataSource.Value.DisposeAsync().ConfigureAwait(false);
}
}
private static string BuildVectorLiteral(float[] values)
{
return "[" + string.Join(",", values.Select(static value => value.ToString("G9", CultureInfo.InvariantCulture))) + "]";
}
private static string NormalizeOptional(string? value)
{
return string.IsNullOrWhiteSpace(value) ? string.Empty : value.Trim();
}
private static string[] ResolveKinds(KnowledgeSearchFilter? filters)
{
if (filters?.Type is not { Count: > 0 })
{
return [];
}
var kinds = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var item in filters.Type)
{
if (string.IsNullOrWhiteSpace(item))
{
continue;
}
if (item.Equals("docs", StringComparison.OrdinalIgnoreCase))
{
kinds.Add("md_section");
continue;
}
if (item.Equals("api", StringComparison.OrdinalIgnoreCase))
{
kinds.Add("api_operation");
continue;
}
if (item.Equals("doctor", StringComparison.OrdinalIgnoreCase))
{
kinds.Add("doctor_check");
}
}
return kinds.OrderBy(static value => value, StringComparer.Ordinal).ToArray();
}
private static string[] ResolveTags(KnowledgeSearchFilter? filters)
{
if (filters?.Tags is not { Count: > 0 })
{
return [];
}
return filters.Tags
.Where(static tag => !string.IsNullOrWhiteSpace(tag))
.Select(static tag => tag.Trim().ToLowerInvariant())
.Distinct(StringComparer.Ordinal)
.OrderBy(static tag => tag, StringComparer.Ordinal)
.ToArray();
}
private async Task<IReadOnlyList<KnowledgeChunkRow>> ReadChunkRowsAsync(
NpgsqlCommand command,
CancellationToken cancellationToken)
{
var rows = new List<KnowledgeChunkRow>();
await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false);
while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false))
{
var metadataJson = reader.IsDBNull(10) ? "{}" : reader.GetString(10);
var metadata = ParseJsonDocument(metadataJson);
float[]? embedding = null;
if (!reader.IsDBNull(12))
{
if (reader.GetValue(12) is float[] values)
{
embedding = values;
}
else if (reader.GetValue(12) is Array array)
{
embedding = array.Cast<object?>().Select(static item => Convert.ToSingle(item, CultureInfo.InvariantCulture)).ToArray();
}
}
rows.Add(new KnowledgeChunkRow(
reader.GetString(0),
reader.GetString(1),
reader.GetString(2),
reader.IsDBNull(3) ? null : reader.GetString(3),
reader.IsDBNull(4) ? null : reader.GetString(4),
reader.GetInt32(5),
reader.GetInt32(6),
reader.GetString(7),
reader.GetString(8),
reader.IsDBNull(9) ? string.Empty : reader.GetString(9),
metadata,
embedding,
reader.IsDBNull(11) ? 0d : reader.GetDouble(11)));
}
return rows;
}
private async Task<bool> HasEmbeddingVectorColumnAsync(CancellationToken cancellationToken)
{
if (_hasEmbeddingVectorColumn.HasValue)
{
return _hasEmbeddingVectorColumn.Value;
}
await using var connection = await GetDataSource().OpenConnectionAsync(cancellationToken).ConfigureAwait(false);
_hasEmbeddingVectorColumn = await HasEmbeddingVectorColumnAsync(connection, null, cancellationToken).ConfigureAwait(false);
return _hasEmbeddingVectorColumn.Value;
}
private static async Task<bool> HasEmbeddingVectorColumnAsync(
NpgsqlConnection connection,
NpgsqlTransaction? transaction,
CancellationToken cancellationToken)
{
const string sql = """
SELECT EXISTS (
SELECT 1
FROM information_schema.columns
WHERE table_schema = 'advisoryai'
AND table_name = 'kb_chunk'
AND column_name = 'embedding_vec'
);
""";
await using var command = connection.CreateCommand();
command.CommandText = sql;
command.Transaction = transaction;
command.CommandTimeout = ToCommandTimeoutSeconds(TimeSpan.FromSeconds(30));
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return result is bool value && value;
}
private async Task InsertDocumentsAsync(
NpgsqlConnection connection,
NpgsqlTransaction transaction,
IReadOnlyList<KnowledgeSourceDocument> documents,
CancellationToken cancellationToken)
{
if (documents.Count == 0)
{
return;
}
const string sql = """
INSERT INTO advisoryai.kb_doc
(
doc_id,
doc_type,
product,
version,
source_ref,
path,
title,
content_hash,
metadata,
indexed_at
)
VALUES
(
@doc_id,
@doc_type,
@product,
@version,
@source_ref,
@path,
@title,
@content_hash,
@metadata::jsonb,
NOW()
);
""";
await using var command = connection.CreateCommand();
command.Transaction = transaction;
command.CommandText = sql;
command.CommandTimeout = ToCommandTimeoutSeconds(TimeSpan.FromSeconds(60));
foreach (var document in documents)
{
command.Parameters.Clear();
command.Parameters.AddWithValue("doc_id", document.DocId);
command.Parameters.AddWithValue("doc_type", document.DocType);
command.Parameters.AddWithValue("product", document.Product);
command.Parameters.AddWithValue("version", document.Version);
command.Parameters.AddWithValue("source_ref", document.SourceRef);
command.Parameters.AddWithValue("path", document.Path);
command.Parameters.AddWithValue("title", document.Title);
command.Parameters.AddWithValue("content_hash", document.ContentHash);
command.Parameters.AddWithValue("metadata", NpgsqlDbType.Jsonb, document.Metadata.RootElement.GetRawText());
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
}
private async Task InsertChunksAsync(
NpgsqlConnection connection,
NpgsqlTransaction transaction,
IReadOnlyList<KnowledgeChunkDocument> chunks,
bool hasEmbeddingVectorColumn,
CancellationToken cancellationToken)
{
if (chunks.Count == 0)
{
return;
}
var sql = hasEmbeddingVectorColumn
? """
INSERT INTO advisoryai.kb_chunk
(
chunk_id,
doc_id,
kind,
anchor,
section_path,
span_start,
span_end,
title,
body,
body_tsv,
embedding,
embedding_vec,
metadata,
indexed_at
)
VALUES
(
@chunk_id,
@doc_id,
@kind,
@anchor,
@section_path,
@span_start,
@span_end,
@title,
@body,
setweight(to_tsvector('simple', coalesce(@title, '')), 'A') ||
setweight(to_tsvector('simple', coalesce(@section_path, '')), 'B') ||
setweight(to_tsvector('simple', coalesce(@body, '')), 'D'),
@embedding,
CAST(@embedding_vector AS vector),
@metadata::jsonb,
NOW()
);
"""
: """
INSERT INTO advisoryai.kb_chunk
(
chunk_id,
doc_id,
kind,
anchor,
section_path,
span_start,
span_end,
title,
body,
body_tsv,
embedding,
metadata,
indexed_at
)
VALUES
(
@chunk_id,
@doc_id,
@kind,
@anchor,
@section_path,
@span_start,
@span_end,
@title,
@body,
setweight(to_tsvector('simple', coalesce(@title, '')), 'A') ||
setweight(to_tsvector('simple', coalesce(@section_path, '')), 'B') ||
setweight(to_tsvector('simple', coalesce(@body, '')), 'D'),
@embedding,
@metadata::jsonb,
NOW()
);
""";
await using var command = connection.CreateCommand();
command.Transaction = transaction;
command.CommandText = sql;
command.CommandTimeout = ToCommandTimeoutSeconds(TimeSpan.FromSeconds(120));
foreach (var chunk in chunks)
{
var embedding = chunk.Embedding;
command.Parameters.Clear();
command.Parameters.AddWithValue("chunk_id", chunk.ChunkId);
command.Parameters.AddWithValue("doc_id", chunk.DocId);
command.Parameters.AddWithValue("kind", chunk.Kind);
command.Parameters.AddWithValue("anchor", (object?)chunk.Anchor ?? DBNull.Value);
command.Parameters.AddWithValue("section_path", (object?)chunk.SectionPath ?? DBNull.Value);
command.Parameters.AddWithValue("span_start", chunk.SpanStart);
command.Parameters.AddWithValue("span_end", chunk.SpanEnd);
command.Parameters.AddWithValue("title", chunk.Title);
command.Parameters.AddWithValue("body", chunk.Body);
command.Parameters.AddWithValue(
"embedding",
NpgsqlDbType.Array | NpgsqlDbType.Real,
embedding is null ? Array.Empty<float>() : embedding);
command.Parameters.AddWithValue("metadata", NpgsqlDbType.Jsonb, chunk.Metadata.RootElement.GetRawText());
if (hasEmbeddingVectorColumn)
{
var vectorLiteral = embedding is null ? (object)DBNull.Value : BuildVectorLiteral(embedding);
command.Parameters.AddWithValue("embedding_vector", vectorLiteral);
}
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
}
private static async Task InsertApiSpecsAsync(
NpgsqlConnection connection,
NpgsqlTransaction transaction,
IReadOnlyList<KnowledgeApiSpec> specs,
CancellationToken cancellationToken)
{
if (specs.Count == 0)
{
return;
}
const string sql = """
INSERT INTO advisoryai.api_spec
(
spec_id,
doc_id,
service,
openapi_version,
title,
version,
source_path,
content_json,
indexed_at
)
VALUES
(
@spec_id,
@doc_id,
@service,
@openapi_version,
@title,
@version,
@source_path,
@content_json::jsonb,
NOW()
);
""";
await using var command = connection.CreateCommand();
command.Transaction = transaction;
command.CommandText = sql;
command.CommandTimeout = ToCommandTimeoutSeconds(TimeSpan.FromSeconds(120));
foreach (var spec in specs)
{
command.Parameters.Clear();
command.Parameters.AddWithValue("spec_id", spec.SpecId);
command.Parameters.AddWithValue("doc_id", spec.DocId);
command.Parameters.AddWithValue("service", spec.Service);
command.Parameters.AddWithValue("openapi_version", (object?)spec.OpenApiVersion ?? DBNull.Value);
command.Parameters.AddWithValue("title", (object?)spec.Title ?? DBNull.Value);
command.Parameters.AddWithValue("version", (object?)spec.Version ?? DBNull.Value);
command.Parameters.AddWithValue("source_path", spec.SourcePath);
command.Parameters.AddWithValue("content_json", NpgsqlDbType.Jsonb, spec.Content.RootElement.GetRawText());
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
}
private static async Task InsertApiOperationsAsync(
NpgsqlConnection connection,
NpgsqlTransaction transaction,
IReadOnlyList<KnowledgeApiOperation> operations,
CancellationToken cancellationToken)
{
if (operations.Count == 0)
{
return;
}
const string sql = """
INSERT INTO advisoryai.api_operation
(
operation_key,
spec_id,
chunk_id,
service,
method,
path,
operation_id,
tags,
summary,
request_json,
responses_json,
security_json,
indexed_at
)
VALUES
(
@operation_key,
@spec_id,
@chunk_id,
@service,
@method,
@path,
@operation_id,
@tags,
@summary,
@request_json::jsonb,
@responses_json::jsonb,
@security_json::jsonb,
NOW()
);
""";
await using var command = connection.CreateCommand();
command.Transaction = transaction;
command.CommandText = sql;
command.CommandTimeout = ToCommandTimeoutSeconds(TimeSpan.FromSeconds(120));
foreach (var operation in operations)
{
command.Parameters.Clear();
command.Parameters.AddWithValue("operation_key", operation.OperationKey);
command.Parameters.AddWithValue("spec_id", operation.SpecId);
command.Parameters.AddWithValue("chunk_id", operation.ChunkId);
command.Parameters.AddWithValue("service", operation.Service);
command.Parameters.AddWithValue("method", operation.Method);
command.Parameters.AddWithValue("path", operation.Path);
command.Parameters.AddWithValue("operation_id", (object?)operation.OperationId ?? DBNull.Value);
command.Parameters.AddWithValue(
"tags",
NpgsqlDbType.Array | NpgsqlDbType.Text,
operation.Tags.Count == 0 ? Array.Empty<string>() : operation.Tags.ToArray());
command.Parameters.AddWithValue("summary", (object?)operation.Summary ?? DBNull.Value);
command.Parameters.AddWithValue("request_json", NpgsqlDbType.Jsonb, operation.RequestJson.RootElement.GetRawText());
command.Parameters.AddWithValue("responses_json", NpgsqlDbType.Jsonb, operation.ResponsesJson.RootElement.GetRawText());
command.Parameters.AddWithValue("security_json", NpgsqlDbType.Jsonb, operation.SecurityJson.RootElement.GetRawText());
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
}
private static async Task InsertDoctorProjectionsAsync(
NpgsqlConnection connection,
NpgsqlTransaction transaction,
IReadOnlyList<KnowledgeDoctorProjection> projections,
CancellationToken cancellationToken)
{
if (projections.Count == 0)
{
return;
}
const string sql = """
INSERT INTO advisoryai.doctor_search_projection
(
check_code,
chunk_id,
title,
severity,
remediation,
run_command,
symptoms,
references_json,
metadata,
indexed_at
)
VALUES
(
@check_code,
@chunk_id,
@title,
@severity,
@remediation,
@run_command,
@symptoms,
@references_json::jsonb,
@metadata::jsonb,
NOW()
);
""";
await using var command = connection.CreateCommand();
command.Transaction = transaction;
command.CommandText = sql;
command.CommandTimeout = ToCommandTimeoutSeconds(TimeSpan.FromSeconds(60));
foreach (var projection in projections)
{
command.Parameters.Clear();
command.Parameters.AddWithValue("check_code", projection.CheckCode);
command.Parameters.AddWithValue("chunk_id", projection.ChunkId);
command.Parameters.AddWithValue("title", projection.Title);
command.Parameters.AddWithValue("severity", projection.Severity);
command.Parameters.AddWithValue("remediation", projection.Remediation);
command.Parameters.AddWithValue("run_command", projection.RunCommand);
command.Parameters.AddWithValue(
"symptoms",
NpgsqlDbType.Array | NpgsqlDbType.Text,
projection.Symptoms.Count == 0 ? Array.Empty<string>() : projection.Symptoms.ToArray());
command.Parameters.AddWithValue("references_json", NpgsqlDbType.Jsonb, projection.ReferencesJson.RootElement.GetRawText());
command.Parameters.AddWithValue("metadata", NpgsqlDbType.Jsonb, projection.MetadataJson.RootElement.GetRawText());
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
}
private static async Task<bool> IsMigrationAppliedAsync(
NpgsqlConnection connection,
NpgsqlTransaction transaction,
string migrationName,
CancellationToken cancellationToken)
{
const string sql = """
SELECT EXISTS(
SELECT 1
FROM advisoryai.__migration_history
WHERE migration_name = @migration_name
);
""";
await using var command = connection.CreateCommand();
command.Transaction = transaction;
command.CommandText = sql;
command.CommandTimeout = ToCommandTimeoutSeconds(TimeSpan.FromSeconds(30));
command.Parameters.AddWithValue("migration_name", migrationName);
var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false);
return result is bool value && value;
}
private static async Task ExecuteNonQueryAsync(
NpgsqlConnection connection,
NpgsqlTransaction? transaction,
string sql,
CancellationToken cancellationToken)
{
await using var command = connection.CreateCommand();
command.Transaction = transaction;
command.CommandText = sql;
command.CommandTimeout = ToCommandTimeoutSeconds(TimeSpan.FromSeconds(120));
await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
private static int ToCommandTimeoutSeconds(TimeSpan timeout)
{
if (timeout <= TimeSpan.Zero)
{
return 30;
}
return Math.Max(1, (int)Math.Ceiling(timeout.TotalSeconds));
}
private NpgsqlCommand CreateCommand(string sql, TimeSpan timeout)
{
var command = GetDataSource().CreateCommand(sql);
command.CommandTimeout = ToCommandTimeoutSeconds(timeout);
return command;
}
private bool IsConfigured()
{
return _options.Enabled && !string.IsNullOrWhiteSpace(_options.ConnectionString);
}
private NpgsqlDataSource GetDataSource()
{
if (_dataSource.Value is null)
{
throw new InvalidOperationException(
"AdvisoryAI knowledge search is not configured. Set AdvisoryAI:KnowledgeSearch:ConnectionString.");
}
return _dataSource.Value;
}
private NpgsqlDataSource? CreateDataSource()
{
if (!IsConfigured())
{
return null;
}
var builder = new NpgsqlDataSourceBuilder(_options.ConnectionString);
return builder.Build();
}
private static IReadOnlyList<(string Name, string Sql)> LoadMigrationScripts()
{
var assembly = typeof(PostgresKnowledgeSearchStore).Assembly;
var scripts = assembly.GetManifestResourceNames()
.Where(static resource =>
resource.Contains(".Storage.Migrations.", StringComparison.Ordinal) &&
resource.EndsWith(".sql", StringComparison.OrdinalIgnoreCase))
.Select(resource =>
{
using var stream = assembly.GetManifestResourceStream(resource);
if (stream is null)
{
return default;
}
using var reader = new StreamReader(stream);
var sql = reader.ReadToEnd();
var marker = ".Storage.Migrations.";
var fileName = resource[(resource.LastIndexOf(marker, StringComparison.Ordinal) + marker.Length)..];
return (Name: fileName, Sql: sql);
})
.Where(static item => !string.IsNullOrWhiteSpace(item.Name) && !string.IsNullOrWhiteSpace(item.Sql))
.OrderBy(static item => item.Name, StringComparer.Ordinal)
.ToList();
if (scripts.Count == 0)
{
throw new InvalidOperationException(
"No AdvisoryAI migration scripts were embedded. Ensure Storage/Migrations/*.sql are embedded resources.");
}
return scripts;
}
private static JsonDocument ParseJsonDocument(string json)
{
if (string.IsNullOrWhiteSpace(json))
{
return EmptyJsonDocument;
}
try
{
return JsonDocument.Parse(json);
}
catch (JsonException)
{
return EmptyJsonDocument;
}
}
}

View File

@@ -0,0 +1,170 @@
[
{
"checkCode": "check.core.disk.space",
"title": "Disk space availability",
"severity": "high",
"description": "Low disk space can block ingestion pipelines and worker execution.",
"remediation": "Free disk space and verify retention settings.",
"runCommand": "stella doctor run --check check.core.disk.space",
"symptoms": [
"no space left on device",
"disk full",
"write failure"
],
"tags": [
"doctor",
"storage",
"core"
],
"references": [
"docs/operations/devops/runbooks/deployment-upgrade.md"
]
},
{
"checkCode": "check.core.db.connectivity",
"title": "PostgreSQL connectivity",
"severity": "high",
"description": "Doctor failed to connect to PostgreSQL or connection health checks timed out.",
"remediation": "Validate credentials, network reachability, and TLS settings.",
"runCommand": "stella doctor run --check check.core.db.connectivity",
"symptoms": [
"database unavailable",
"connection refused",
"timeout expired"
],
"tags": [
"doctor",
"database",
"connectivity"
],
"references": [
"docs/INSTALL_GUIDE.md"
]
},
{
"checkCode": "check.security.oidc.readiness",
"title": "OIDC readiness",
"severity": "warn",
"description": "OIDC prerequisites are missing or identity issuer metadata is not reachable.",
"remediation": "Verify issuer URL, JWKS availability, and Authority client configuration.",
"runCommand": "stella doctor run --check check.security.oidc.readiness",
"symptoms": [
"oidc setup",
"invalid issuer",
"jwks fetch failed"
],
"tags": [
"doctor",
"security",
"oidc"
],
"references": [
"docs/modules/authority/architecture.md"
]
},
{
"checkCode": "check.router.gateway.routes",
"title": "Router route registration",
"severity": "warn",
"description": "Expected gateway routes were not registered or health probes failed.",
"remediation": "Inspect route tables and refresh router registration.",
"runCommand": "stella doctor run --check check.router.gateway.routes",
"symptoms": [
"route missing",
"404 on expected endpoint",
"gateway routing"
],
"tags": [
"doctor",
"router",
"gateway"
],
"references": [
"docs/modules/router/README.md"
]
},
{
"checkCode": "check.integrations.secrets.binding",
"title": "Integration secret binding",
"severity": "medium",
"description": "Integration connectors cannot resolve configured secrets.",
"remediation": "Validate secret provider configuration and rotate invalid credentials.",
"runCommand": "stella doctor run --check check.integrations.secrets.binding",
"symptoms": [
"secret missing",
"invalid credential",
"auth failed"
],
"tags": [
"doctor",
"integrations",
"secrets"
],
"references": [
"docs/modules/platform/architecture-overview.md"
]
},
{
"checkCode": "check.release.policy.gate",
"title": "Policy gate prerequisites",
"severity": "warn",
"description": "Release policy gate prerequisites are incomplete for the target environment.",
"remediation": "Review required approvals, policy bundle versions, and attestations.",
"runCommand": "stella doctor run --check check.release.policy.gate",
"symptoms": [
"policy gate failed",
"missing attestation",
"promotion blocked"
],
"tags": [
"doctor",
"release",
"policy"
],
"references": [
"docs/operations/upgrade-runbook.md"
]
},
{
"checkCode": "check.airgap.bundle.integrity",
"title": "Air-gap bundle integrity",
"severity": "high",
"description": "Offline bundle integrity validation failed.",
"remediation": "Rebuild the bundle and verify signatures and checksums before import.",
"runCommand": "stella doctor run --check check.airgap.bundle.integrity",
"symptoms": [
"checksum mismatch",
"signature invalid",
"offline import failed"
],
"tags": [
"doctor",
"airgap",
"integrity"
],
"references": [
"docs/operations/devops/runbooks/deployment-upgrade.md"
]
},
{
"checkCode": "check.telemetry.pipeline.delivery",
"title": "Telemetry delivery pipeline",
"severity": "medium",
"description": "Telemetry queue backlog is growing or delivery workers are stalled.",
"remediation": "Scale workers, inspect queue depth, and validate downstream availability.",
"runCommand": "stella doctor run --check check.telemetry.pipeline.delivery",
"symptoms": [
"telemetry lag",
"queue backlog",
"delivery timeout"
],
"tags": [
"doctor",
"telemetry",
"queue"
],
"references": [
"docs/modules/platform/architecture-overview.md"
]
}
]

View File

@@ -11,6 +11,14 @@
<InternalsVisibleTo Include="StellaOps.Bench.AdvisoryAI" />
<InternalsVisibleTo Include="StellaOps.AdvisoryAI.Tests" />
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="Storage/Migrations/*.sql" />
</ItemGroup>
<ItemGroup>
<None Update="KnowledgeSearch/doctor-search-seed.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />

View File

@@ -0,0 +1,132 @@
-- AdvisoryAI Knowledge Search schema
-- Sprint: SPRINT_20260222_051_AdvisoryAI_knowledge_search_docs_api_doctor
CREATE SCHEMA IF NOT EXISTS advisoryai;
DO $$
BEGIN
CREATE EXTENSION IF NOT EXISTS vector;
EXCEPTION
WHEN OTHERS THEN
RAISE NOTICE 'pgvector extension is unavailable; AdvisoryAI Knowledge Search will use array embeddings.';
END
$$;
CREATE TABLE IF NOT EXISTS advisoryai.kb_doc
(
doc_id TEXT PRIMARY KEY,
doc_type TEXT NOT NULL,
product TEXT NOT NULL,
version TEXT NOT NULL,
source_ref TEXT NOT NULL,
path TEXT NOT NULL,
title TEXT NOT NULL,
content_hash TEXT NOT NULL,
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_kb_doc_unique_source
ON advisoryai.kb_doc (doc_type, product, version, source_ref, path);
CREATE TABLE IF NOT EXISTS advisoryai.kb_chunk
(
chunk_id TEXT PRIMARY KEY,
doc_id TEXT NOT NULL REFERENCES advisoryai.kb_doc (doc_id) ON DELETE CASCADE,
kind TEXT NOT NULL,
anchor TEXT,
section_path TEXT,
span_start INTEGER NOT NULL DEFAULT 0,
span_end INTEGER NOT NULL DEFAULT 0,
title TEXT NOT NULL,
body TEXT NOT NULL,
body_tsv TSVECTOR NOT NULL,
embedding REAL[],
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_kb_chunk_doc_id
ON advisoryai.kb_chunk (doc_id);
CREATE INDEX IF NOT EXISTS idx_kb_chunk_kind
ON advisoryai.kb_chunk (kind);
CREATE INDEX IF NOT EXISTS idx_kb_chunk_body_tsv
ON advisoryai.kb_chunk USING GIN (body_tsv);
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector') THEN
IF NOT EXISTS (
SELECT 1
FROM information_schema.columns
WHERE table_schema = 'advisoryai'
AND table_name = 'kb_chunk'
AND column_name = 'embedding_vec')
THEN
ALTER TABLE advisoryai.kb_chunk
ADD COLUMN embedding_vec vector(384);
END IF;
CREATE INDEX IF NOT EXISTS idx_kb_chunk_embedding_vec_hnsw
ON advisoryai.kb_chunk USING hnsw (embedding_vec vector_cosine_ops);
END IF;
END
$$;
CREATE TABLE IF NOT EXISTS advisoryai.api_spec
(
spec_id TEXT PRIMARY KEY,
doc_id TEXT NOT NULL REFERENCES advisoryai.kb_doc (doc_id) ON DELETE CASCADE,
service TEXT NOT NULL,
openapi_version TEXT,
title TEXT,
version TEXT,
source_path TEXT NOT NULL,
content_json JSONB NOT NULL,
indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_api_spec_service
ON advisoryai.api_spec (service);
CREATE TABLE IF NOT EXISTS advisoryai.api_operation
(
operation_key TEXT PRIMARY KEY,
spec_id TEXT NOT NULL REFERENCES advisoryai.api_spec (spec_id) ON DELETE CASCADE,
chunk_id TEXT NOT NULL REFERENCES advisoryai.kb_chunk (chunk_id) ON DELETE CASCADE,
service TEXT NOT NULL,
method TEXT NOT NULL,
path TEXT NOT NULL,
operation_id TEXT,
tags TEXT[] NOT NULL DEFAULT ARRAY[]::TEXT[],
summary TEXT,
request_json JSONB NOT NULL DEFAULT '{}'::jsonb,
responses_json JSONB NOT NULL DEFAULT '{}'::jsonb,
security_json JSONB NOT NULL DEFAULT '[]'::jsonb,
indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_api_operation_method_path
ON advisoryai.api_operation (service, method, path);
CREATE INDEX IF NOT EXISTS idx_api_operation_operation_id
ON advisoryai.api_operation (operation_id);
CREATE TABLE IF NOT EXISTS advisoryai.doctor_search_projection
(
check_code TEXT PRIMARY KEY,
chunk_id TEXT NOT NULL REFERENCES advisoryai.kb_chunk (chunk_id) ON DELETE CASCADE,
title TEXT NOT NULL,
severity TEXT NOT NULL,
remediation TEXT NOT NULL,
run_command TEXT NOT NULL,
symptoms TEXT[] NOT NULL DEFAULT ARRAY[]::TEXT[],
references_json JSONB NOT NULL DEFAULT '[]'::jsonb,
metadata JSONB NOT NULL DEFAULT '{}'::jsonb,
indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_doctor_projection_severity
ON advisoryai.doctor_search_projection (severity);