Verify live search suggestions against ingested corpus
This commit is contained in:
@@ -20,7 +20,7 @@
|
||||
## Delivery Tracker
|
||||
|
||||
### QA-ZL-001 - Add live corpus preflight and rebuild checks
|
||||
Status: TODO
|
||||
Status: DONE
|
||||
Dependency: none
|
||||
Owners: Test Automation
|
||||
Task description:
|
||||
@@ -28,12 +28,12 @@ Task description:
|
||||
- Fail with explicit setup diagnostics when the corpus is empty or stale instead of producing misleading UI failures.
|
||||
|
||||
Completion criteria:
|
||||
- [ ] The live suite checks rebuild/readiness before suggestion assertions.
|
||||
- [ ] Failure output distinguishes ingestion failure from UI failure.
|
||||
- [ ] Setup docs reference compiled CLI and HTTP rebuild fallbacks.
|
||||
- [x] The live suite checks rebuild/readiness before suggestion assertions.
|
||||
- [x] Failure output distinguishes ingestion failure from UI failure.
|
||||
- [x] Setup docs reference compiled CLI and HTTP rebuild fallbacks.
|
||||
|
||||
### QA-ZL-002 - Prove every surfaced suggestion succeeds
|
||||
Status: TODO
|
||||
Status: DONE
|
||||
Dependency: QA-ZL-001
|
||||
Owners: Test Automation
|
||||
Task description:
|
||||
@@ -41,32 +41,37 @@ Task description:
|
||||
- Include pages that rely on current-scope weighting and overflow fallback.
|
||||
|
||||
Completion criteria:
|
||||
- [ ] The live suite iterates through each surfaced suggestion on the covered pages.
|
||||
- [ ] Every rendered suggestion produces a visible non-dead-end state.
|
||||
- [ ] Previously failing suggestion paths are covered explicitly.
|
||||
- [x] The live suite iterates through each surfaced suggestion on the covered pages.
|
||||
- [x] Every rendered suggestion produces a visible non-dead-end state.
|
||||
- [x] Previously failing suggestion paths are covered explicitly.
|
||||
|
||||
### QA-ZL-003 - Verify search-to-chat consolidation
|
||||
Status: TODO
|
||||
Status: DONE
|
||||
Dependency: QA-ZL-002
|
||||
Owners: Test Automation
|
||||
Task description:
|
||||
- Verify the compact chat launcher and answer-panel handoff preserve query, page context, and evidence after the search redesign.
|
||||
|
||||
Completion criteria:
|
||||
- [ ] Search is the tested primary entry in all covered flows.
|
||||
- [ ] AdvisoryAI opens as a secondary deep-dive from search with inherited context.
|
||||
- [ ] Execution log records the final full-pack commands and outcomes.
|
||||
- [x] Search is the tested primary entry in all covered flows.
|
||||
- [x] AdvisoryAI opens as a secondary deep-dive from search with inherited context.
|
||||
- [x] Execution log records the final full-pack commands and outcomes.
|
||||
|
||||
## Execution Log
|
||||
| Date (UTC) | Update | Owner |
|
||||
| --- | --- | --- |
|
||||
| 2026-03-07 | Sprint created for live corpus-backed suggestion reliability and zero-learning search verification. | Project Manager |
|
||||
| 2026-03-07 | Reproduced the user-facing failure against `http://127.1.0.44`: health was up but `POST /v1/advisory-ai/index/rebuild` returned `documentCount=0`, `chunkCount=0`, and `doctorProjectionCount=0`, so suggestion preflight now treats empty-corpus services as setup failures instead of UI regressions. | Test Automation |
|
||||
| 2026-03-07 | Prepared sources against the repo-controlled service, rebuilt both indexes, and verified live query `database connectivity` returned `contextAnswer.status=grounded` with knowledge cards and citations. | Test Automation |
|
||||
| 2026-03-07 | Ran `npx playwright test tests/e2e/unified-search-contextual-suggestions.live.e2e.spec.ts --config playwright.config.ts` against `http://127.0.0.1:10451`; result `5/5` passed covering chip viability, every surfaced suggestion, result-open follow-up chips, and Ask-AdvisoryAI handoff. | Test Automation |
|
||||
|
||||
## Decisions & Risks
|
||||
- Decision: live reliability gates are required because static mocks cannot prove suggestion viability against real corpora.
|
||||
- Decision: a healthy service with an empty corpus is an ingestion/setup failure, not a passing baseline; live E2E must fail before UI assertions in that case.
|
||||
- Risk: local environments may have partially ingested or empty corpora, especially in Doctor/knowledge projections.
|
||||
- Mitigation: add explicit corpus preflight and rebuild guidance so the suite fails with actionable diagnostics.
|
||||
- Mitigation: use a repo-controlled local service (`http://127.0.0.1:10451`) with `advisoryai sources prepare`, `POST /v1/advisory-ai/index/rebuild`, and `POST /v1/search/index/rebuild` before running the live suite.
|
||||
|
||||
## Next Checkpoints
|
||||
- 2026-03-09: Land live corpus preflight before broadening the suggestion matrix.
|
||||
- 2026-03-10: Run the final live suggestion pack and capture exact outcomes in the execution log.
|
||||
- 2026-03-09: Broaden live coverage beyond Doctor once findings/policy/VEX ingestion parity is available.
|
||||
- 2026-03-10: Fold the live reliability lane into the consolidated zero-learning search redesign phases.
|
||||
|
||||
@@ -403,7 +403,8 @@ Current live verification coverage:
|
||||
- Rebuild order exercised against a running local service: `POST /v1/advisory-ai/index/rebuild` then `POST /v1/search/index/rebuild`
|
||||
- Verified live query: `database connectivity`
|
||||
- Verified live outcome: response includes `contextAnswer.status = grounded`, citations, and entity cards over ingested data
|
||||
- Verified live suggestion lane: the Doctor-page `database connectivity` chip remains a viable query after rebuild and is exercised by `src/Web/StellaOps.Web/tests/e2e/unified-search-contextual-suggestions.live.e2e.spec.ts`
|
||||
- Verified live suggestion lane: `src/Web/StellaOps.Web/tests/e2e/unified-search-contextual-suggestions.live.e2e.spec.ts` now preflights corpus readiness, validates suggestion viability, executes every surfaced Doctor suggestion, asserts grounded-or-clarify answer states, verifies follow-up chips after result open, and verifies Ask-AdvisoryAI inherits the live query context
|
||||
- Verified local corpus baseline on 2026-03-07 after `advisoryai sources prepare`: `documentCount = 470`, `chunkCount = 9050`, `apiOperationCount = 2190`, `doctorProjectionCount = 8`
|
||||
- Other routes still rely on deterministic mock-backed Playwright coverage until their ingestion parity is explicitly verified
|
||||
|
||||
Or use the full CI testing stack:
|
||||
|
||||
@@ -61,6 +61,7 @@
|
||||
- Knowledge/domain emptiness should be detectable so the UI can suppress invalid chips.
|
||||
- Empty-state contextual chips and page-owned common-question chips should preflight through the backend viability endpoint before they render.
|
||||
- Live Playwright coverage must assert that every surfaced suggestion returns visible results.
|
||||
- A service health check alone is not enough. On 2026-03-07, `http://127.1.0.44/health` returned `200` while the live knowledge rebuild returned `documentCount=0`; the product still surfaced dead chips. Corpus readiness is the gate, not process liveness.
|
||||
|
||||
## Phase map
|
||||
- Phase 1: FE primary-entry consolidation and removal of explicit search controls.
|
||||
@@ -68,3 +69,4 @@
|
||||
- Phase 3: FE consumption of overflow results and executable suggestion contracts.
|
||||
- Implemented on 2026-03-07: backend `contextAnswer` is now preferred over frontend heuristics, overflow renders as a secondary result section, and suggestion viability preflight suppresses dead chips before they are shown.
|
||||
- Phase 4: Live Playwright reliability matrix with corpus preflight and chip-success guarantees.
|
||||
- Implemented on 2026-03-07: the live suite now rebuilds the active corpus, fails fast on empty knowledge projections, iterates every surfaced Doctor suggestion, and verifies Ask-AdvisoryAI inherits the live search context.
|
||||
|
||||
@@ -7,6 +7,7 @@ const liveSearchBaseUrl = process.env['LIVE_ADVISORYAI_SEARCH_BASE_URL']?.trim()
|
||||
const liveTenant = process.env['LIVE_ADVISORYAI_TENANT']?.trim() || 'test-tenant';
|
||||
const liveScopes = process.env['LIVE_ADVISORYAI_SCOPES']?.trim()
|
||||
|| 'advisory-ai:view advisory-ai:operate advisory-ai:admin';
|
||||
const liveSuggestionSeedQueries = ['database connectivity', 'OIDC readiness'];
|
||||
|
||||
const mockConfig = {
|
||||
authority: {
|
||||
@@ -112,13 +113,14 @@ test.describe('Unified Search - Live contextual suggestions', () => {
|
||||
test.beforeAll(async () => {
|
||||
await ensureLiveServiceHealthy(liveSearchBaseUrl);
|
||||
await rebuildLiveIndexes(liveSearchBaseUrl);
|
||||
await assertLiveSuggestionCoverage(liveSearchBaseUrl, liveSuggestionSeedQueries);
|
||||
});
|
||||
|
||||
test.beforeEach(async ({ page }) => {
|
||||
await setupDoctorPage(page);
|
||||
});
|
||||
|
||||
test('shows automatic suggestion chips when the doctor page opens', async ({ page }) => {
|
||||
test('shows only viable live suggestion chips when the doctor page opens', async ({ page }) => {
|
||||
await routeLiveUnifiedSearch(page);
|
||||
await openDoctor(page);
|
||||
|
||||
@@ -138,6 +140,34 @@ test.describe('Unified Search - Live contextual suggestions', () => {
|
||||
}).first()).toBeVisible();
|
||||
});
|
||||
|
||||
test('every surfaced doctor suggestion executes into a grounded or clarify state', async ({ page }) => {
|
||||
await routeLiveUnifiedSearch(page);
|
||||
await openDoctor(page);
|
||||
|
||||
const searchInput = page.locator('app-global-search input[type="text"]');
|
||||
await searchInput.focus();
|
||||
await waitForResults(page);
|
||||
|
||||
const suggestionTexts = (await page.locator('.search__suggestions .search__chip').allTextContents())
|
||||
.map((text) => text.trim())
|
||||
.filter((text) => text.length > 0);
|
||||
|
||||
expect(suggestionTexts.length).toBeGreaterThan(0);
|
||||
|
||||
for (const suggestionText of suggestionTexts) {
|
||||
await openDoctor(page);
|
||||
await searchInput.focus();
|
||||
await waitForResults(page);
|
||||
await page.locator('.search__suggestions .search__chip', {
|
||||
hasText: new RegExp(`^${escapeRegExp(suggestionText)}$`, 'i'),
|
||||
}).first().click();
|
||||
|
||||
await expect(searchInput).toHaveValue(suggestionText);
|
||||
await waitForResults(page);
|
||||
await assertNonDeadEndSearch(page, suggestionText);
|
||||
}
|
||||
});
|
||||
|
||||
test('clicking a suggestion chip executes a live query and shows a grounded answer', async ({ page }) => {
|
||||
const capturedRequests: Array<Record<string, unknown>> = [];
|
||||
await routeLiveUnifiedSearch(page, capturedRequests);
|
||||
@@ -196,6 +226,31 @@ test.describe('Unified Search - Live contextual suggestions', () => {
|
||||
hasText: /follow up:\s*database connectivity/i,
|
||||
}).first()).toBeVisible();
|
||||
});
|
||||
|
||||
test('answer-panel Ask AdvisoryAI keeps the live query context', async ({ page }) => {
|
||||
const capturedTurnBodies: Array<Record<string, unknown>> = [];
|
||||
await routeLiveUnifiedSearch(page);
|
||||
await mockChatConversation(page, capturedTurnBodies);
|
||||
await openDoctor(page);
|
||||
|
||||
const searchInput = page.locator('app-global-search input[type="text"]');
|
||||
await searchInput.focus();
|
||||
await waitForResults(page);
|
||||
await page.locator('.search__suggestions .search__chip', {
|
||||
hasText: /database connectivity/i,
|
||||
}).first().click();
|
||||
|
||||
await expect(searchInput).toHaveValue('database connectivity');
|
||||
await waitForResults(page);
|
||||
await expect(page.locator('[data-answer-status="grounded"]')).toBeVisible();
|
||||
|
||||
await page.locator('[data-answer-action="ask-ai"]').click();
|
||||
|
||||
await expect(page.locator('.assistant-drawer')).toBeVisible({ timeout: 10_000 });
|
||||
await expect.poll(() => capturedTurnBodies.length).toBeGreaterThan(0);
|
||||
expect(String(capturedTurnBodies.at(-1)?.['content'] ?? '')).toMatch(/database connectivity/i);
|
||||
expect(String(capturedTurnBodies.at(-1)?.['content'] ?? '')).toMatch(/grounded answer|best next step/i);
|
||||
});
|
||||
});
|
||||
|
||||
async function setupDoctorPage(page: Page): Promise<void> {
|
||||
@@ -311,6 +366,16 @@ async function routeLiveUnifiedSearch(
|
||||
body,
|
||||
});
|
||||
});
|
||||
|
||||
await page.route('**/api/v1/search/suggestions/evaluate', async (route) => {
|
||||
const rawBody = route.request().postData() ?? '{}';
|
||||
const body = await fetchLiveSuggestionViability(rawBody);
|
||||
await route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function ensureLiveServiceHealthy(baseUrl: string): Promise<void> {
|
||||
@@ -345,6 +410,26 @@ async function rebuildLiveIndexes(baseUrl: string): Promise<void> {
|
||||
}
|
||||
}
|
||||
|
||||
async function assertLiveSuggestionCoverage(
|
||||
baseUrl: string,
|
||||
queries: readonly string[],
|
||||
): Promise<void> {
|
||||
const payload = await fetchLiveSuggestionViability(JSON.stringify({
|
||||
queries,
|
||||
ambient: {
|
||||
currentRoute: '/ops/operations/doctor',
|
||||
},
|
||||
}));
|
||||
const suggestions = Array.isArray(payload['suggestions'])
|
||||
? payload['suggestions'] as Array<Record<string, unknown>>
|
||||
: [];
|
||||
const viableSuggestions = suggestions.filter((suggestion) => suggestion['viable'] === true);
|
||||
|
||||
if (viableSuggestions.length === 0) {
|
||||
throw new Error(`Live suggestion preflight returned no viable queries: ${JSON.stringify(payload)}`);
|
||||
}
|
||||
}
|
||||
|
||||
function safeParseRequest(rawBody: string): Record<string, unknown> {
|
||||
try {
|
||||
const parsed = JSON.parse(rawBody) as Record<string, unknown>;
|
||||
@@ -353,3 +438,218 @@ function safeParseRequest(rawBody: string): Record<string, unknown> {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchLiveSuggestionViability(rawBody: string): Promise<Record<string, unknown>> {
|
||||
const headers = {
|
||||
'content-type': 'application/json',
|
||||
'x-stellaops-scopes': liveScopes,
|
||||
'x-stellaops-tenant': liveTenant,
|
||||
'x-stellaops-actor': 'playwright-live',
|
||||
};
|
||||
|
||||
const directResponse = await fetch(`${liveSearchBaseUrl}/v1/search/suggestions/evaluate`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: rawBody,
|
||||
});
|
||||
|
||||
if (directResponse.ok) {
|
||||
return safeParseRequest(await directResponse.text());
|
||||
}
|
||||
|
||||
if (directResponse.status !== 404) {
|
||||
throw new Error(`Live suggestion preflight failed with status ${directResponse.status}.`);
|
||||
}
|
||||
|
||||
const parsedBody = safeParseRequest(rawBody);
|
||||
return buildCompatibilitySuggestionViability(parsedBody, headers);
|
||||
}
|
||||
|
||||
async function buildCompatibilitySuggestionViability(
|
||||
requestBody: Record<string, unknown>,
|
||||
headers: Record<string, string>,
|
||||
): Promise<Record<string, unknown>> {
|
||||
const queries = Array.isArray(requestBody['queries'])
|
||||
? requestBody['queries'].map((query) => String(query ?? '').trim()).filter((query) => query.length > 0)
|
||||
: [];
|
||||
const filters = requestBody['filters'];
|
||||
const ambient = requestBody['ambient'];
|
||||
const suggestions: Array<Record<string, unknown>> = [];
|
||||
let mergedCoverage: Record<string, unknown> | null = null;
|
||||
|
||||
for (const query of queries) {
|
||||
const response = await fetch(`${liveSearchBaseUrl}/v1/search/query`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify({
|
||||
q: query,
|
||||
k: 5,
|
||||
includeSynthesis: false,
|
||||
filters,
|
||||
ambient,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Compatibility suggestion query failed for "${query}" with status ${response.status}.`);
|
||||
}
|
||||
|
||||
const payload = safeParseRequest(await response.text());
|
||||
const cards = Array.isArray(payload['cards']) ? payload['cards'] as Array<Record<string, unknown>> : [];
|
||||
const overflow = payload['overflow'] && typeof payload['overflow'] === 'object'
|
||||
? payload['overflow'] as Record<string, unknown>
|
||||
: null;
|
||||
const overflowCards = Array.isArray(overflow?.['cards']) ? overflow!['cards'] as Array<Record<string, unknown>> : [];
|
||||
const contextAnswer = payload['contextAnswer'] && typeof payload['contextAnswer'] === 'object'
|
||||
? payload['contextAnswer'] as Record<string, unknown>
|
||||
: null;
|
||||
const coverage = payload['coverage'] && typeof payload['coverage'] === 'object'
|
||||
? payload['coverage'] as Record<string, unknown>
|
||||
: null;
|
||||
const cardCount = cards.length + overflowCards.length;
|
||||
const status = String(contextAnswer?.['status'] ?? 'insufficient');
|
||||
const leadingDomain =
|
||||
String(cards[0]?.['domain'] ?? overflowCards[0]?.['domain'] ?? coverage?.['currentScopeDomain'] ?? '');
|
||||
|
||||
suggestions.push({
|
||||
query,
|
||||
viable: cardCount > 0 || status === 'clarify',
|
||||
status,
|
||||
code: String(contextAnswer?.['code'] ?? 'no_grounded_evidence'),
|
||||
cardCount,
|
||||
leadingDomain: leadingDomain || undefined,
|
||||
reason: String(contextAnswer?.['reason'] ?? 'No grounded evidence matched the suggestion in the active corpus.'),
|
||||
});
|
||||
|
||||
mergedCoverage = mergeCoverage(mergedCoverage, coverage);
|
||||
}
|
||||
|
||||
return {
|
||||
suggestions,
|
||||
coverage: mergedCoverage,
|
||||
};
|
||||
}
|
||||
|
||||
function mergeCoverage(
|
||||
current: Record<string, unknown> | null,
|
||||
next: Record<string, unknown> | null,
|
||||
): Record<string, unknown> | null {
|
||||
if (!next) {
|
||||
return current;
|
||||
}
|
||||
|
||||
if (!current) {
|
||||
return next;
|
||||
}
|
||||
|
||||
const currentDomains = Array.isArray(current['domains']) ? current['domains'] as Array<Record<string, unknown>> : [];
|
||||
const nextDomains = Array.isArray(next['domains']) ? next['domains'] as Array<Record<string, unknown>> : [];
|
||||
const mergedDomainsByKey = new Map<string, Record<string, unknown>>();
|
||||
|
||||
for (const domain of [...currentDomains, ...nextDomains]) {
|
||||
const key = String(domain['domain'] ?? '');
|
||||
if (!key) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const existing = mergedDomainsByKey.get(key);
|
||||
if (!existing) {
|
||||
mergedDomainsByKey.set(key, domain);
|
||||
continue;
|
||||
}
|
||||
|
||||
mergedDomainsByKey.set(key, {
|
||||
domain: key,
|
||||
candidateCount: Math.max(Number(existing['candidateCount'] ?? 0), Number(domain['candidateCount'] ?? 0)),
|
||||
visibleCardCount: Math.max(Number(existing['visibleCardCount'] ?? 0), Number(domain['visibleCardCount'] ?? 0)),
|
||||
topScore: Math.max(Number(existing['topScore'] ?? 0), Number(domain['topScore'] ?? 0)),
|
||||
isCurrentScope: Boolean(existing['isCurrentScope']) || Boolean(domain['isCurrentScope']),
|
||||
hasVisibleResults: Boolean(existing['hasVisibleResults']) || Boolean(domain['hasVisibleResults']),
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
currentScopeDomain: String(current['currentScopeDomain'] ?? next['currentScopeDomain'] ?? ''),
|
||||
currentScopeWeighted: Boolean(current['currentScopeWeighted']) || Boolean(next['currentScopeWeighted']),
|
||||
domains: Array.from(mergedDomainsByKey.values()),
|
||||
};
|
||||
}
|
||||
|
||||
async function assertNonDeadEndSearch(page: Page, suggestionText: string): Promise<void> {
|
||||
await expect.poll(async () => {
|
||||
const status = await page.locator('[data-answer-status]').first().getAttribute('data-answer-status');
|
||||
if (status === 'grounded' || status === 'clarify') {
|
||||
return status;
|
||||
}
|
||||
|
||||
return '';
|
||||
}, {
|
||||
message: `Expected "${suggestionText}" to resolve into a grounded or clarify answer.`,
|
||||
}).not.toBe('');
|
||||
|
||||
const answerStatus = await page.locator('[data-answer-status]').first().getAttribute('data-answer-status');
|
||||
if (answerStatus === 'grounded') {
|
||||
await waitForEntityCards(page, 1);
|
||||
}
|
||||
}
|
||||
|
||||
async function mockChatConversation(
|
||||
page: Page,
|
||||
capturedTurnBodies: Array<Record<string, unknown>>,
|
||||
): Promise<void> {
|
||||
await page.route('**/api/v1/advisory-ai/conversations', async (route) => {
|
||||
if (route.request().method() !== 'POST') {
|
||||
return route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify([]),
|
||||
});
|
||||
}
|
||||
|
||||
return route.fulfill({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
conversationId: 'conv-live-context-1',
|
||||
tenantId: liveTenant,
|
||||
userId: 'playwright-live',
|
||||
context: {},
|
||||
turns: [],
|
||||
createdAt: '2026-03-07T00:00:00.000Z',
|
||||
updatedAt: '2026-03-07T00:00:00.000Z',
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
await page.route('**/api/v1/advisory-ai/conversations/*/turns', async (route) => {
|
||||
if (route.request().method() !== 'POST') {
|
||||
return route.continue();
|
||||
}
|
||||
|
||||
capturedTurnBodies.push((route.request().postDataJSON() as Record<string, unknown> | null) ?? {});
|
||||
const events = [
|
||||
'event: progress',
|
||||
'data: {"stage":"searching"}',
|
||||
'',
|
||||
'event: token',
|
||||
'data: {"content":"I can expand the grounded answer and recommend the next step."}',
|
||||
'',
|
||||
'event: done',
|
||||
'data: {"turnId":"turn-live-context-1","groundingScore":0.93}',
|
||||
'',
|
||||
].join('\n');
|
||||
|
||||
return route.fulfill({
|
||||
status: 200,
|
||||
headers: {
|
||||
'content-type': 'text/event-stream; charset=utf-8',
|
||||
'cache-control': 'no-cache',
|
||||
},
|
||||
body: events,
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function escapeRegExp(value: string): string {
|
||||
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user