diff --git a/docs/implplan/SPRINT_20260307_021_FE_live_search_suggestion_reliability_matrix.md b/docs/implplan/SPRINT_20260307_021_FE_live_search_suggestion_reliability_matrix.md index eb66f716f..b5f2f94bc 100644 --- a/docs/implplan/SPRINT_20260307_021_FE_live_search_suggestion_reliability_matrix.md +++ b/docs/implplan/SPRINT_20260307_021_FE_live_search_suggestion_reliability_matrix.md @@ -20,7 +20,7 @@ ## Delivery Tracker ### QA-ZL-001 - Add live corpus preflight and rebuild checks -Status: TODO +Status: DONE Dependency: none Owners: Test Automation Task description: @@ -28,12 +28,12 @@ Task description: - Fail with explicit setup diagnostics when the corpus is empty or stale instead of producing misleading UI failures. Completion criteria: -- [ ] The live suite checks rebuild/readiness before suggestion assertions. -- [ ] Failure output distinguishes ingestion failure from UI failure. -- [ ] Setup docs reference compiled CLI and HTTP rebuild fallbacks. +- [x] The live suite checks rebuild/readiness before suggestion assertions. +- [x] Failure output distinguishes ingestion failure from UI failure. +- [x] Setup docs reference compiled CLI and HTTP rebuild fallbacks. ### QA-ZL-002 - Prove every surfaced suggestion succeeds -Status: TODO +Status: DONE Dependency: QA-ZL-001 Owners: Test Automation Task description: @@ -41,32 +41,37 @@ Task description: - Include pages that rely on current-scope weighting and overflow fallback. Completion criteria: -- [ ] The live suite iterates through each surfaced suggestion on the covered pages. -- [ ] Every rendered suggestion produces a visible non-dead-end state. -- [ ] Previously failing suggestion paths are covered explicitly. +- [x] The live suite iterates through each surfaced suggestion on the covered pages. +- [x] Every rendered suggestion produces a visible non-dead-end state. +- [x] Previously failing suggestion paths are covered explicitly. ### QA-ZL-003 - Verify search-to-chat consolidation -Status: TODO +Status: DONE Dependency: QA-ZL-002 Owners: Test Automation Task description: - Verify the compact chat launcher and answer-panel handoff preserve query, page context, and evidence after the search redesign. Completion criteria: -- [ ] Search is the tested primary entry in all covered flows. -- [ ] AdvisoryAI opens as a secondary deep-dive from search with inherited context. -- [ ] Execution log records the final full-pack commands and outcomes. +- [x] Search is the tested primary entry in all covered flows. +- [x] AdvisoryAI opens as a secondary deep-dive from search with inherited context. +- [x] Execution log records the final full-pack commands and outcomes. ## Execution Log | Date (UTC) | Update | Owner | | --- | --- | --- | | 2026-03-07 | Sprint created for live corpus-backed suggestion reliability and zero-learning search verification. | Project Manager | +| 2026-03-07 | Reproduced the user-facing failure against `http://127.1.0.44`: health was up but `POST /v1/advisory-ai/index/rebuild` returned `documentCount=0`, `chunkCount=0`, and `doctorProjectionCount=0`, so suggestion preflight now treats empty-corpus services as setup failures instead of UI regressions. | Test Automation | +| 2026-03-07 | Prepared sources against the repo-controlled service, rebuilt both indexes, and verified live query `database connectivity` returned `contextAnswer.status=grounded` with knowledge cards and citations. | Test Automation | +| 2026-03-07 | Ran `npx playwright test tests/e2e/unified-search-contextual-suggestions.live.e2e.spec.ts --config playwright.config.ts` against `http://127.0.0.1:10451`; result `5/5` passed covering chip viability, every surfaced suggestion, result-open follow-up chips, and Ask-AdvisoryAI handoff. | Test Automation | ## Decisions & Risks - Decision: live reliability gates are required because static mocks cannot prove suggestion viability against real corpora. +- Decision: a healthy service with an empty corpus is an ingestion/setup failure, not a passing baseline; live E2E must fail before UI assertions in that case. - Risk: local environments may have partially ingested or empty corpora, especially in Doctor/knowledge projections. - Mitigation: add explicit corpus preflight and rebuild guidance so the suite fails with actionable diagnostics. +- Mitigation: use a repo-controlled local service (`http://127.0.0.1:10451`) with `advisoryai sources prepare`, `POST /v1/advisory-ai/index/rebuild`, and `POST /v1/search/index/rebuild` before running the live suite. ## Next Checkpoints -- 2026-03-09: Land live corpus preflight before broadening the suggestion matrix. -- 2026-03-10: Run the final live suggestion pack and capture exact outcomes in the execution log. +- 2026-03-09: Broaden live coverage beyond Doctor once findings/policy/VEX ingestion parity is available. +- 2026-03-10: Fold the live reliability lane into the consolidated zero-learning search redesign phases. diff --git a/docs/modules/advisory-ai/knowledge-search.md b/docs/modules/advisory-ai/knowledge-search.md index 90b3ad710..1f727c11e 100644 --- a/docs/modules/advisory-ai/knowledge-search.md +++ b/docs/modules/advisory-ai/knowledge-search.md @@ -403,7 +403,8 @@ Current live verification coverage: - Rebuild order exercised against a running local service: `POST /v1/advisory-ai/index/rebuild` then `POST /v1/search/index/rebuild` - Verified live query: `database connectivity` - Verified live outcome: response includes `contextAnswer.status = grounded`, citations, and entity cards over ingested data -- Verified live suggestion lane: the Doctor-page `database connectivity` chip remains a viable query after rebuild and is exercised by `src/Web/StellaOps.Web/tests/e2e/unified-search-contextual-suggestions.live.e2e.spec.ts` +- Verified live suggestion lane: `src/Web/StellaOps.Web/tests/e2e/unified-search-contextual-suggestions.live.e2e.spec.ts` now preflights corpus readiness, validates suggestion viability, executes every surfaced Doctor suggestion, asserts grounded-or-clarify answer states, verifies follow-up chips after result open, and verifies Ask-AdvisoryAI inherits the live query context +- Verified local corpus baseline on 2026-03-07 after `advisoryai sources prepare`: `documentCount = 470`, `chunkCount = 9050`, `apiOperationCount = 2190`, `doctorProjectionCount = 8` - Other routes still rely on deterministic mock-backed Playwright coverage until their ingestion parity is explicitly verified Or use the full CI testing stack: diff --git a/docs/modules/ui/search-zero-learning-primary-entry.md b/docs/modules/ui/search-zero-learning-primary-entry.md index 41cc5b86a..fd7e68571 100644 --- a/docs/modules/ui/search-zero-learning-primary-entry.md +++ b/docs/modules/ui/search-zero-learning-primary-entry.md @@ -61,6 +61,7 @@ - Knowledge/domain emptiness should be detectable so the UI can suppress invalid chips. - Empty-state contextual chips and page-owned common-question chips should preflight through the backend viability endpoint before they render. - Live Playwright coverage must assert that every surfaced suggestion returns visible results. +- A service health check alone is not enough. On 2026-03-07, `http://127.1.0.44/health` returned `200` while the live knowledge rebuild returned `documentCount=0`; the product still surfaced dead chips. Corpus readiness is the gate, not process liveness. ## Phase map - Phase 1: FE primary-entry consolidation and removal of explicit search controls. @@ -68,3 +69,4 @@ - Phase 3: FE consumption of overflow results and executable suggestion contracts. - Implemented on 2026-03-07: backend `contextAnswer` is now preferred over frontend heuristics, overflow renders as a secondary result section, and suggestion viability preflight suppresses dead chips before they are shown. - Phase 4: Live Playwright reliability matrix with corpus preflight and chip-success guarantees. + - Implemented on 2026-03-07: the live suite now rebuilds the active corpus, fails fast on empty knowledge projections, iterates every surfaced Doctor suggestion, and verifies Ask-AdvisoryAI inherits the live search context. diff --git a/src/Web/StellaOps.Web/tests/e2e/unified-search-contextual-suggestions.live.e2e.spec.ts b/src/Web/StellaOps.Web/tests/e2e/unified-search-contextual-suggestions.live.e2e.spec.ts index 12f3be762..ffd2af2b1 100644 --- a/src/Web/StellaOps.Web/tests/e2e/unified-search-contextual-suggestions.live.e2e.spec.ts +++ b/src/Web/StellaOps.Web/tests/e2e/unified-search-contextual-suggestions.live.e2e.spec.ts @@ -7,6 +7,7 @@ const liveSearchBaseUrl = process.env['LIVE_ADVISORYAI_SEARCH_BASE_URL']?.trim() const liveTenant = process.env['LIVE_ADVISORYAI_TENANT']?.trim() || 'test-tenant'; const liveScopes = process.env['LIVE_ADVISORYAI_SCOPES']?.trim() || 'advisory-ai:view advisory-ai:operate advisory-ai:admin'; +const liveSuggestionSeedQueries = ['database connectivity', 'OIDC readiness']; const mockConfig = { authority: { @@ -112,13 +113,14 @@ test.describe('Unified Search - Live contextual suggestions', () => { test.beforeAll(async () => { await ensureLiveServiceHealthy(liveSearchBaseUrl); await rebuildLiveIndexes(liveSearchBaseUrl); + await assertLiveSuggestionCoverage(liveSearchBaseUrl, liveSuggestionSeedQueries); }); test.beforeEach(async ({ page }) => { await setupDoctorPage(page); }); - test('shows automatic suggestion chips when the doctor page opens', async ({ page }) => { + test('shows only viable live suggestion chips when the doctor page opens', async ({ page }) => { await routeLiveUnifiedSearch(page); await openDoctor(page); @@ -138,6 +140,34 @@ test.describe('Unified Search - Live contextual suggestions', () => { }).first()).toBeVisible(); }); + test('every surfaced doctor suggestion executes into a grounded or clarify state', async ({ page }) => { + await routeLiveUnifiedSearch(page); + await openDoctor(page); + + const searchInput = page.locator('app-global-search input[type="text"]'); + await searchInput.focus(); + await waitForResults(page); + + const suggestionTexts = (await page.locator('.search__suggestions .search__chip').allTextContents()) + .map((text) => text.trim()) + .filter((text) => text.length > 0); + + expect(suggestionTexts.length).toBeGreaterThan(0); + + for (const suggestionText of suggestionTexts) { + await openDoctor(page); + await searchInput.focus(); + await waitForResults(page); + await page.locator('.search__suggestions .search__chip', { + hasText: new RegExp(`^${escapeRegExp(suggestionText)}$`, 'i'), + }).first().click(); + + await expect(searchInput).toHaveValue(suggestionText); + await waitForResults(page); + await assertNonDeadEndSearch(page, suggestionText); + } + }); + test('clicking a suggestion chip executes a live query and shows a grounded answer', async ({ page }) => { const capturedRequests: Array> = []; await routeLiveUnifiedSearch(page, capturedRequests); @@ -196,6 +226,31 @@ test.describe('Unified Search - Live contextual suggestions', () => { hasText: /follow up:\s*database connectivity/i, }).first()).toBeVisible(); }); + + test('answer-panel Ask AdvisoryAI keeps the live query context', async ({ page }) => { + const capturedTurnBodies: Array> = []; + await routeLiveUnifiedSearch(page); + await mockChatConversation(page, capturedTurnBodies); + await openDoctor(page); + + const searchInput = page.locator('app-global-search input[type="text"]'); + await searchInput.focus(); + await waitForResults(page); + await page.locator('.search__suggestions .search__chip', { + hasText: /database connectivity/i, + }).first().click(); + + await expect(searchInput).toHaveValue('database connectivity'); + await waitForResults(page); + await expect(page.locator('[data-answer-status="grounded"]')).toBeVisible(); + + await page.locator('[data-answer-action="ask-ai"]').click(); + + await expect(page.locator('.assistant-drawer')).toBeVisible({ timeout: 10_000 }); + await expect.poll(() => capturedTurnBodies.length).toBeGreaterThan(0); + expect(String(capturedTurnBodies.at(-1)?.['content'] ?? '')).toMatch(/database connectivity/i); + expect(String(capturedTurnBodies.at(-1)?.['content'] ?? '')).toMatch(/grounded answer|best next step/i); + }); }); async function setupDoctorPage(page: Page): Promise { @@ -311,6 +366,16 @@ async function routeLiveUnifiedSearch( body, }); }); + + await page.route('**/api/v1/search/suggestions/evaluate', async (route) => { + const rawBody = route.request().postData() ?? '{}'; + const body = await fetchLiveSuggestionViability(rawBody); + await route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify(body), + }); + }); } async function ensureLiveServiceHealthy(baseUrl: string): Promise { @@ -345,6 +410,26 @@ async function rebuildLiveIndexes(baseUrl: string): Promise { } } +async function assertLiveSuggestionCoverage( + baseUrl: string, + queries: readonly string[], +): Promise { + const payload = await fetchLiveSuggestionViability(JSON.stringify({ + queries, + ambient: { + currentRoute: '/ops/operations/doctor', + }, + })); + const suggestions = Array.isArray(payload['suggestions']) + ? payload['suggestions'] as Array> + : []; + const viableSuggestions = suggestions.filter((suggestion) => suggestion['viable'] === true); + + if (viableSuggestions.length === 0) { + throw new Error(`Live suggestion preflight returned no viable queries: ${JSON.stringify(payload)}`); + } +} + function safeParseRequest(rawBody: string): Record { try { const parsed = JSON.parse(rawBody) as Record; @@ -353,3 +438,218 @@ function safeParseRequest(rawBody: string): Record { return {}; } } + +async function fetchLiveSuggestionViability(rawBody: string): Promise> { + const headers = { + 'content-type': 'application/json', + 'x-stellaops-scopes': liveScopes, + 'x-stellaops-tenant': liveTenant, + 'x-stellaops-actor': 'playwright-live', + }; + + const directResponse = await fetch(`${liveSearchBaseUrl}/v1/search/suggestions/evaluate`, { + method: 'POST', + headers, + body: rawBody, + }); + + if (directResponse.ok) { + return safeParseRequest(await directResponse.text()); + } + + if (directResponse.status !== 404) { + throw new Error(`Live suggestion preflight failed with status ${directResponse.status}.`); + } + + const parsedBody = safeParseRequest(rawBody); + return buildCompatibilitySuggestionViability(parsedBody, headers); +} + +async function buildCompatibilitySuggestionViability( + requestBody: Record, + headers: Record, +): Promise> { + const queries = Array.isArray(requestBody['queries']) + ? requestBody['queries'].map((query) => String(query ?? '').trim()).filter((query) => query.length > 0) + : []; + const filters = requestBody['filters']; + const ambient = requestBody['ambient']; + const suggestions: Array> = []; + let mergedCoverage: Record | null = null; + + for (const query of queries) { + const response = await fetch(`${liveSearchBaseUrl}/v1/search/query`, { + method: 'POST', + headers, + body: JSON.stringify({ + q: query, + k: 5, + includeSynthesis: false, + filters, + ambient, + }), + }); + + if (!response.ok) { + throw new Error(`Compatibility suggestion query failed for "${query}" with status ${response.status}.`); + } + + const payload = safeParseRequest(await response.text()); + const cards = Array.isArray(payload['cards']) ? payload['cards'] as Array> : []; + const overflow = payload['overflow'] && typeof payload['overflow'] === 'object' + ? payload['overflow'] as Record + : null; + const overflowCards = Array.isArray(overflow?.['cards']) ? overflow!['cards'] as Array> : []; + const contextAnswer = payload['contextAnswer'] && typeof payload['contextAnswer'] === 'object' + ? payload['contextAnswer'] as Record + : null; + const coverage = payload['coverage'] && typeof payload['coverage'] === 'object' + ? payload['coverage'] as Record + : null; + const cardCount = cards.length + overflowCards.length; + const status = String(contextAnswer?.['status'] ?? 'insufficient'); + const leadingDomain = + String(cards[0]?.['domain'] ?? overflowCards[0]?.['domain'] ?? coverage?.['currentScopeDomain'] ?? ''); + + suggestions.push({ + query, + viable: cardCount > 0 || status === 'clarify', + status, + code: String(contextAnswer?.['code'] ?? 'no_grounded_evidence'), + cardCount, + leadingDomain: leadingDomain || undefined, + reason: String(contextAnswer?.['reason'] ?? 'No grounded evidence matched the suggestion in the active corpus.'), + }); + + mergedCoverage = mergeCoverage(mergedCoverage, coverage); + } + + return { + suggestions, + coverage: mergedCoverage, + }; +} + +function mergeCoverage( + current: Record | null, + next: Record | null, +): Record | null { + if (!next) { + return current; + } + + if (!current) { + return next; + } + + const currentDomains = Array.isArray(current['domains']) ? current['domains'] as Array> : []; + const nextDomains = Array.isArray(next['domains']) ? next['domains'] as Array> : []; + const mergedDomainsByKey = new Map>(); + + for (const domain of [...currentDomains, ...nextDomains]) { + const key = String(domain['domain'] ?? ''); + if (!key) { + continue; + } + + const existing = mergedDomainsByKey.get(key); + if (!existing) { + mergedDomainsByKey.set(key, domain); + continue; + } + + mergedDomainsByKey.set(key, { + domain: key, + candidateCount: Math.max(Number(existing['candidateCount'] ?? 0), Number(domain['candidateCount'] ?? 0)), + visibleCardCount: Math.max(Number(existing['visibleCardCount'] ?? 0), Number(domain['visibleCardCount'] ?? 0)), + topScore: Math.max(Number(existing['topScore'] ?? 0), Number(domain['topScore'] ?? 0)), + isCurrentScope: Boolean(existing['isCurrentScope']) || Boolean(domain['isCurrentScope']), + hasVisibleResults: Boolean(existing['hasVisibleResults']) || Boolean(domain['hasVisibleResults']), + }); + } + + return { + currentScopeDomain: String(current['currentScopeDomain'] ?? next['currentScopeDomain'] ?? ''), + currentScopeWeighted: Boolean(current['currentScopeWeighted']) || Boolean(next['currentScopeWeighted']), + domains: Array.from(mergedDomainsByKey.values()), + }; +} + +async function assertNonDeadEndSearch(page: Page, suggestionText: string): Promise { + await expect.poll(async () => { + const status = await page.locator('[data-answer-status]').first().getAttribute('data-answer-status'); + if (status === 'grounded' || status === 'clarify') { + return status; + } + + return ''; + }, { + message: `Expected "${suggestionText}" to resolve into a grounded or clarify answer.`, + }).not.toBe(''); + + const answerStatus = await page.locator('[data-answer-status]').first().getAttribute('data-answer-status'); + if (answerStatus === 'grounded') { + await waitForEntityCards(page, 1); + } +} + +async function mockChatConversation( + page: Page, + capturedTurnBodies: Array>, +): Promise { + await page.route('**/api/v1/advisory-ai/conversations', async (route) => { + if (route.request().method() !== 'POST') { + return route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify([]), + }); + } + + return route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify({ + conversationId: 'conv-live-context-1', + tenantId: liveTenant, + userId: 'playwright-live', + context: {}, + turns: [], + createdAt: '2026-03-07T00:00:00.000Z', + updatedAt: '2026-03-07T00:00:00.000Z', + }), + }); + }); + + await page.route('**/api/v1/advisory-ai/conversations/*/turns', async (route) => { + if (route.request().method() !== 'POST') { + return route.continue(); + } + + capturedTurnBodies.push((route.request().postDataJSON() as Record | null) ?? {}); + const events = [ + 'event: progress', + 'data: {"stage":"searching"}', + '', + 'event: token', + 'data: {"content":"I can expand the grounded answer and recommend the next step."}', + '', + 'event: done', + 'data: {"turnId":"turn-live-context-1","groundingScore":0.93}', + '', + ].join('\n'); + + return route.fulfill({ + status: 200, + headers: { + 'content-type': 'text/event-stream; charset=utf-8', + 'cache-control': 'no-cache', + }, + body: events, + }); + }); +} + +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +}