Verify live search suggestions against ingested corpus

This commit is contained in:
master
2026-03-07 18:52:18 +02:00
parent 9d3bed1d0e
commit 820fb4ec25
4 changed files with 324 additions and 16 deletions

View File

@@ -20,7 +20,7 @@
## Delivery Tracker
### QA-ZL-001 - Add live corpus preflight and rebuild checks
Status: TODO
Status: DONE
Dependency: none
Owners: Test Automation
Task description:
@@ -28,12 +28,12 @@ Task description:
- Fail with explicit setup diagnostics when the corpus is empty or stale instead of producing misleading UI failures.
Completion criteria:
- [ ] The live suite checks rebuild/readiness before suggestion assertions.
- [ ] Failure output distinguishes ingestion failure from UI failure.
- [ ] Setup docs reference compiled CLI and HTTP rebuild fallbacks.
- [x] The live suite checks rebuild/readiness before suggestion assertions.
- [x] Failure output distinguishes ingestion failure from UI failure.
- [x] Setup docs reference compiled CLI and HTTP rebuild fallbacks.
### QA-ZL-002 - Prove every surfaced suggestion succeeds
Status: TODO
Status: DONE
Dependency: QA-ZL-001
Owners: Test Automation
Task description:
@@ -41,32 +41,37 @@ Task description:
- Include pages that rely on current-scope weighting and overflow fallback.
Completion criteria:
- [ ] The live suite iterates through each surfaced suggestion on the covered pages.
- [ ] Every rendered suggestion produces a visible non-dead-end state.
- [ ] Previously failing suggestion paths are covered explicitly.
- [x] The live suite iterates through each surfaced suggestion on the covered pages.
- [x] Every rendered suggestion produces a visible non-dead-end state.
- [x] Previously failing suggestion paths are covered explicitly.
### QA-ZL-003 - Verify search-to-chat consolidation
Status: TODO
Status: DONE
Dependency: QA-ZL-002
Owners: Test Automation
Task description:
- Verify the compact chat launcher and answer-panel handoff preserve query, page context, and evidence after the search redesign.
Completion criteria:
- [ ] Search is the tested primary entry in all covered flows.
- [ ] AdvisoryAI opens as a secondary deep-dive from search with inherited context.
- [ ] Execution log records the final full-pack commands and outcomes.
- [x] Search is the tested primary entry in all covered flows.
- [x] AdvisoryAI opens as a secondary deep-dive from search with inherited context.
- [x] Execution log records the final full-pack commands and outcomes.
## Execution Log
| Date (UTC) | Update | Owner |
| --- | --- | --- |
| 2026-03-07 | Sprint created for live corpus-backed suggestion reliability and zero-learning search verification. | Project Manager |
| 2026-03-07 | Reproduced the user-facing failure against `http://127.1.0.44`: health was up but `POST /v1/advisory-ai/index/rebuild` returned `documentCount=0`, `chunkCount=0`, and `doctorProjectionCount=0`, so suggestion preflight now treats empty-corpus services as setup failures instead of UI regressions. | Test Automation |
| 2026-03-07 | Prepared sources against the repo-controlled service, rebuilt both indexes, and verified live query `database connectivity` returned `contextAnswer.status=grounded` with knowledge cards and citations. | Test Automation |
| 2026-03-07 | Ran `npx playwright test tests/e2e/unified-search-contextual-suggestions.live.e2e.spec.ts --config playwright.config.ts` against `http://127.0.0.1:10451`; result `5/5` passed covering chip viability, every surfaced suggestion, result-open follow-up chips, and Ask-AdvisoryAI handoff. | Test Automation |
## Decisions & Risks
- Decision: live reliability gates are required because static mocks cannot prove suggestion viability against real corpora.
- Decision: a healthy service with an empty corpus is an ingestion/setup failure, not a passing baseline; live E2E must fail before UI assertions in that case.
- Risk: local environments may have partially ingested or empty corpora, especially in Doctor/knowledge projections.
- Mitigation: add explicit corpus preflight and rebuild guidance so the suite fails with actionable diagnostics.
- Mitigation: use a repo-controlled local service (`http://127.0.0.1:10451`) with `advisoryai sources prepare`, `POST /v1/advisory-ai/index/rebuild`, and `POST /v1/search/index/rebuild` before running the live suite.
## Next Checkpoints
- 2026-03-09: Land live corpus preflight before broadening the suggestion matrix.
- 2026-03-10: Run the final live suggestion pack and capture exact outcomes in the execution log.
- 2026-03-09: Broaden live coverage beyond Doctor once findings/policy/VEX ingestion parity is available.
- 2026-03-10: Fold the live reliability lane into the consolidated zero-learning search redesign phases.

View File

@@ -403,7 +403,8 @@ Current live verification coverage:
- Rebuild order exercised against a running local service: `POST /v1/advisory-ai/index/rebuild` then `POST /v1/search/index/rebuild`
- Verified live query: `database connectivity`
- Verified live outcome: response includes `contextAnswer.status = grounded`, citations, and entity cards over ingested data
- Verified live suggestion lane: the Doctor-page `database connectivity` chip remains a viable query after rebuild and is exercised by `src/Web/StellaOps.Web/tests/e2e/unified-search-contextual-suggestions.live.e2e.spec.ts`
- Verified live suggestion lane: `src/Web/StellaOps.Web/tests/e2e/unified-search-contextual-suggestions.live.e2e.spec.ts` now preflights corpus readiness, validates suggestion viability, executes every surfaced Doctor suggestion, asserts grounded-or-clarify answer states, verifies follow-up chips after result open, and verifies Ask-AdvisoryAI inherits the live query context
- Verified local corpus baseline on 2026-03-07 after `advisoryai sources prepare`: `documentCount = 470`, `chunkCount = 9050`, `apiOperationCount = 2190`, `doctorProjectionCount = 8`
- Other routes still rely on deterministic mock-backed Playwright coverage until their ingestion parity is explicitly verified
Or use the full CI testing stack:

View File

@@ -61,6 +61,7 @@
- Knowledge/domain emptiness should be detectable so the UI can suppress invalid chips.
- Empty-state contextual chips and page-owned common-question chips should preflight through the backend viability endpoint before they render.
- Live Playwright coverage must assert that every surfaced suggestion returns visible results.
- A service health check alone is not enough. On 2026-03-07, `http://127.1.0.44/health` returned `200` while the live knowledge rebuild returned `documentCount=0`; the product still surfaced dead chips. Corpus readiness is the gate, not process liveness.
## Phase map
- Phase 1: FE primary-entry consolidation and removal of explicit search controls.
@@ -68,3 +69,4 @@
- Phase 3: FE consumption of overflow results and executable suggestion contracts.
- Implemented on 2026-03-07: backend `contextAnswer` is now preferred over frontend heuristics, overflow renders as a secondary result section, and suggestion viability preflight suppresses dead chips before they are shown.
- Phase 4: Live Playwright reliability matrix with corpus preflight and chip-success guarantees.
- Implemented on 2026-03-07: the live suite now rebuilds the active corpus, fails fast on empty knowledge projections, iterates every surfaced Doctor suggestion, and verifies Ask-AdvisoryAI inherits the live search context.

View File

@@ -7,6 +7,7 @@ const liveSearchBaseUrl = process.env['LIVE_ADVISORYAI_SEARCH_BASE_URL']?.trim()
const liveTenant = process.env['LIVE_ADVISORYAI_TENANT']?.trim() || 'test-tenant';
const liveScopes = process.env['LIVE_ADVISORYAI_SCOPES']?.trim()
|| 'advisory-ai:view advisory-ai:operate advisory-ai:admin';
const liveSuggestionSeedQueries = ['database connectivity', 'OIDC readiness'];
const mockConfig = {
authority: {
@@ -112,13 +113,14 @@ test.describe('Unified Search - Live contextual suggestions', () => {
test.beforeAll(async () => {
await ensureLiveServiceHealthy(liveSearchBaseUrl);
await rebuildLiveIndexes(liveSearchBaseUrl);
await assertLiveSuggestionCoverage(liveSearchBaseUrl, liveSuggestionSeedQueries);
});
test.beforeEach(async ({ page }) => {
await setupDoctorPage(page);
});
test('shows automatic suggestion chips when the doctor page opens', async ({ page }) => {
test('shows only viable live suggestion chips when the doctor page opens', async ({ page }) => {
await routeLiveUnifiedSearch(page);
await openDoctor(page);
@@ -138,6 +140,34 @@ test.describe('Unified Search - Live contextual suggestions', () => {
}).first()).toBeVisible();
});
test('every surfaced doctor suggestion executes into a grounded or clarify state', async ({ page }) => {
await routeLiveUnifiedSearch(page);
await openDoctor(page);
const searchInput = page.locator('app-global-search input[type="text"]');
await searchInput.focus();
await waitForResults(page);
const suggestionTexts = (await page.locator('.search__suggestions .search__chip').allTextContents())
.map((text) => text.trim())
.filter((text) => text.length > 0);
expect(suggestionTexts.length).toBeGreaterThan(0);
for (const suggestionText of suggestionTexts) {
await openDoctor(page);
await searchInput.focus();
await waitForResults(page);
await page.locator('.search__suggestions .search__chip', {
hasText: new RegExp(`^${escapeRegExp(suggestionText)}$`, 'i'),
}).first().click();
await expect(searchInput).toHaveValue(suggestionText);
await waitForResults(page);
await assertNonDeadEndSearch(page, suggestionText);
}
});
test('clicking a suggestion chip executes a live query and shows a grounded answer', async ({ page }) => {
const capturedRequests: Array<Record<string, unknown>> = [];
await routeLiveUnifiedSearch(page, capturedRequests);
@@ -196,6 +226,31 @@ test.describe('Unified Search - Live contextual suggestions', () => {
hasText: /follow up:\s*database connectivity/i,
}).first()).toBeVisible();
});
test('answer-panel Ask AdvisoryAI keeps the live query context', async ({ page }) => {
const capturedTurnBodies: Array<Record<string, unknown>> = [];
await routeLiveUnifiedSearch(page);
await mockChatConversation(page, capturedTurnBodies);
await openDoctor(page);
const searchInput = page.locator('app-global-search input[type="text"]');
await searchInput.focus();
await waitForResults(page);
await page.locator('.search__suggestions .search__chip', {
hasText: /database connectivity/i,
}).first().click();
await expect(searchInput).toHaveValue('database connectivity');
await waitForResults(page);
await expect(page.locator('[data-answer-status="grounded"]')).toBeVisible();
await page.locator('[data-answer-action="ask-ai"]').click();
await expect(page.locator('.assistant-drawer')).toBeVisible({ timeout: 10_000 });
await expect.poll(() => capturedTurnBodies.length).toBeGreaterThan(0);
expect(String(capturedTurnBodies.at(-1)?.['content'] ?? '')).toMatch(/database connectivity/i);
expect(String(capturedTurnBodies.at(-1)?.['content'] ?? '')).toMatch(/grounded answer|best next step/i);
});
});
async function setupDoctorPage(page: Page): Promise<void> {
@@ -311,6 +366,16 @@ async function routeLiveUnifiedSearch(
body,
});
});
await page.route('**/api/v1/search/suggestions/evaluate', async (route) => {
const rawBody = route.request().postData() ?? '{}';
const body = await fetchLiveSuggestionViability(rawBody);
await route.fulfill({
status: 200,
contentType: 'application/json',
body: JSON.stringify(body),
});
});
}
async function ensureLiveServiceHealthy(baseUrl: string): Promise<void> {
@@ -345,6 +410,26 @@ async function rebuildLiveIndexes(baseUrl: string): Promise<void> {
}
}
async function assertLiveSuggestionCoverage(
baseUrl: string,
queries: readonly string[],
): Promise<void> {
const payload = await fetchLiveSuggestionViability(JSON.stringify({
queries,
ambient: {
currentRoute: '/ops/operations/doctor',
},
}));
const suggestions = Array.isArray(payload['suggestions'])
? payload['suggestions'] as Array<Record<string, unknown>>
: [];
const viableSuggestions = suggestions.filter((suggestion) => suggestion['viable'] === true);
if (viableSuggestions.length === 0) {
throw new Error(`Live suggestion preflight returned no viable queries: ${JSON.stringify(payload)}`);
}
}
function safeParseRequest(rawBody: string): Record<string, unknown> {
try {
const parsed = JSON.parse(rawBody) as Record<string, unknown>;
@@ -353,3 +438,218 @@ function safeParseRequest(rawBody: string): Record<string, unknown> {
return {};
}
}
async function fetchLiveSuggestionViability(rawBody: string): Promise<Record<string, unknown>> {
const headers = {
'content-type': 'application/json',
'x-stellaops-scopes': liveScopes,
'x-stellaops-tenant': liveTenant,
'x-stellaops-actor': 'playwright-live',
};
const directResponse = await fetch(`${liveSearchBaseUrl}/v1/search/suggestions/evaluate`, {
method: 'POST',
headers,
body: rawBody,
});
if (directResponse.ok) {
return safeParseRequest(await directResponse.text());
}
if (directResponse.status !== 404) {
throw new Error(`Live suggestion preflight failed with status ${directResponse.status}.`);
}
const parsedBody = safeParseRequest(rawBody);
return buildCompatibilitySuggestionViability(parsedBody, headers);
}
async function buildCompatibilitySuggestionViability(
requestBody: Record<string, unknown>,
headers: Record<string, string>,
): Promise<Record<string, unknown>> {
const queries = Array.isArray(requestBody['queries'])
? requestBody['queries'].map((query) => String(query ?? '').trim()).filter((query) => query.length > 0)
: [];
const filters = requestBody['filters'];
const ambient = requestBody['ambient'];
const suggestions: Array<Record<string, unknown>> = [];
let mergedCoverage: Record<string, unknown> | null = null;
for (const query of queries) {
const response = await fetch(`${liveSearchBaseUrl}/v1/search/query`, {
method: 'POST',
headers,
body: JSON.stringify({
q: query,
k: 5,
includeSynthesis: false,
filters,
ambient,
}),
});
if (!response.ok) {
throw new Error(`Compatibility suggestion query failed for "${query}" with status ${response.status}.`);
}
const payload = safeParseRequest(await response.text());
const cards = Array.isArray(payload['cards']) ? payload['cards'] as Array<Record<string, unknown>> : [];
const overflow = payload['overflow'] && typeof payload['overflow'] === 'object'
? payload['overflow'] as Record<string, unknown>
: null;
const overflowCards = Array.isArray(overflow?.['cards']) ? overflow!['cards'] as Array<Record<string, unknown>> : [];
const contextAnswer = payload['contextAnswer'] && typeof payload['contextAnswer'] === 'object'
? payload['contextAnswer'] as Record<string, unknown>
: null;
const coverage = payload['coverage'] && typeof payload['coverage'] === 'object'
? payload['coverage'] as Record<string, unknown>
: null;
const cardCount = cards.length + overflowCards.length;
const status = String(contextAnswer?.['status'] ?? 'insufficient');
const leadingDomain =
String(cards[0]?.['domain'] ?? overflowCards[0]?.['domain'] ?? coverage?.['currentScopeDomain'] ?? '');
suggestions.push({
query,
viable: cardCount > 0 || status === 'clarify',
status,
code: String(contextAnswer?.['code'] ?? 'no_grounded_evidence'),
cardCount,
leadingDomain: leadingDomain || undefined,
reason: String(contextAnswer?.['reason'] ?? 'No grounded evidence matched the suggestion in the active corpus.'),
});
mergedCoverage = mergeCoverage(mergedCoverage, coverage);
}
return {
suggestions,
coverage: mergedCoverage,
};
}
function mergeCoverage(
current: Record<string, unknown> | null,
next: Record<string, unknown> | null,
): Record<string, unknown> | null {
if (!next) {
return current;
}
if (!current) {
return next;
}
const currentDomains = Array.isArray(current['domains']) ? current['domains'] as Array<Record<string, unknown>> : [];
const nextDomains = Array.isArray(next['domains']) ? next['domains'] as Array<Record<string, unknown>> : [];
const mergedDomainsByKey = new Map<string, Record<string, unknown>>();
for (const domain of [...currentDomains, ...nextDomains]) {
const key = String(domain['domain'] ?? '');
if (!key) {
continue;
}
const existing = mergedDomainsByKey.get(key);
if (!existing) {
mergedDomainsByKey.set(key, domain);
continue;
}
mergedDomainsByKey.set(key, {
domain: key,
candidateCount: Math.max(Number(existing['candidateCount'] ?? 0), Number(domain['candidateCount'] ?? 0)),
visibleCardCount: Math.max(Number(existing['visibleCardCount'] ?? 0), Number(domain['visibleCardCount'] ?? 0)),
topScore: Math.max(Number(existing['topScore'] ?? 0), Number(domain['topScore'] ?? 0)),
isCurrentScope: Boolean(existing['isCurrentScope']) || Boolean(domain['isCurrentScope']),
hasVisibleResults: Boolean(existing['hasVisibleResults']) || Boolean(domain['hasVisibleResults']),
});
}
return {
currentScopeDomain: String(current['currentScopeDomain'] ?? next['currentScopeDomain'] ?? ''),
currentScopeWeighted: Boolean(current['currentScopeWeighted']) || Boolean(next['currentScopeWeighted']),
domains: Array.from(mergedDomainsByKey.values()),
};
}
async function assertNonDeadEndSearch(page: Page, suggestionText: string): Promise<void> {
await expect.poll(async () => {
const status = await page.locator('[data-answer-status]').first().getAttribute('data-answer-status');
if (status === 'grounded' || status === 'clarify') {
return status;
}
return '';
}, {
message: `Expected "${suggestionText}" to resolve into a grounded or clarify answer.`,
}).not.toBe('');
const answerStatus = await page.locator('[data-answer-status]').first().getAttribute('data-answer-status');
if (answerStatus === 'grounded') {
await waitForEntityCards(page, 1);
}
}
async function mockChatConversation(
page: Page,
capturedTurnBodies: Array<Record<string, unknown>>,
): Promise<void> {
await page.route('**/api/v1/advisory-ai/conversations', async (route) => {
if (route.request().method() !== 'POST') {
return route.fulfill({
status: 200,
contentType: 'application/json',
body: JSON.stringify([]),
});
}
return route.fulfill({
status: 200,
contentType: 'application/json',
body: JSON.stringify({
conversationId: 'conv-live-context-1',
tenantId: liveTenant,
userId: 'playwright-live',
context: {},
turns: [],
createdAt: '2026-03-07T00:00:00.000Z',
updatedAt: '2026-03-07T00:00:00.000Z',
}),
});
});
await page.route('**/api/v1/advisory-ai/conversations/*/turns', async (route) => {
if (route.request().method() !== 'POST') {
return route.continue();
}
capturedTurnBodies.push((route.request().postDataJSON() as Record<string, unknown> | null) ?? {});
const events = [
'event: progress',
'data: {"stage":"searching"}',
'',
'event: token',
'data: {"content":"I can expand the grounded answer and recommend the next step."}',
'',
'event: done',
'data: {"turnId":"turn-live-context-1","groundingScore":0.93}',
'',
].join('\n');
return route.fulfill({
status: 200,
headers: {
'content-type': 'text/event-stream; charset=utf-8',
'cache-control': 'no-cache',
},
body: events,
});
});
}
function escapeRegExp(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}