diff --git a/EXPLORER_STATE.md b/EXPLORER_STATE.md index ae43fc0..21c8415 100644 --- a/EXPLORER_STATE.md +++ b/EXPLORER_STATE.md @@ -293,6 +293,41 @@ that investigation recommends switching the backend (e.g., from in-browser ILIKE → static-Parquet inverted index → hosted-search service), (C) is compatible with all of them. +### Light-path addendum: two-button scope selection ([#178](https://github.com/isamplesorg/isamplesorg.github.io/issues/178), 2026-05-08) + +Hana's mockup ([Figma 213:394](https://www.figma.com/design/Nqkuqh3Z4aqVh0nmwUAgKg/iSamples-Wireframe-1.0?node-id=213-394)) +proposed a two-button search UI: "Search Selected Areas" (viewport-scoped) +and "Search Entire World" (full-corpus). Implemented as a Light extension +of (C), not a revisit of the A/B/C decision: + +- "Search Entire World" runs the existing (C) full-corpus side-panel + lookup with result-pin overlay. Behavior unchanged from the contract above. + SQL shape: CTE over `sample_facets_v2` → top-50 → `LEFT JOIN` to + `samples_map_lite` for display coords (samples without coords still + appear; lat/lng are null). +- "Search Selected Areas" runs the same text predicate but with a + different SQL shape: `INNER JOIN` `samples_map_lite` inside the + candidate selection, viewport `BETWEEN` predicate applied **before** + `ORDER BY ... LIMIT 50`. This is critical — applying viewport after + the global top-50 produces false zeroes (the global top-50 is + concentrated in a few hot regions; a Sudan-area `pottery` query + would return zero even though Sudan has plenty of pottery hits). + Dateline-crossing is split into two longitude ranges. +- URL state gains `?search_scope=area|world`; default `world`, omitted + from URL when default. Hydrated on boot; written by `persistSearchScope()`. +- Result-pin overlay still applies in both modes — pin coordinates + reflect what was found, viewport-scope just narrows the candidate set. +- Auto-fly to the first result is suppressed in area mode (the user is + already at the area they care about; flying would zoom in and disorient). +- Area mode requires coordinates by definition, so the `INNER JOIN` + drops samples that have facets but no `samples_map_lite` row. World + mode keeps them (via `LEFT JOIN`) since coord-less samples are still + legitimate text matches. + +A future Heavy revisit may rethink (A) global-filter semantics if usage +data shows users *expect* the map and facets to update with search. +That decision is deferred until #170-#172 land. + --- ## 7. Facet-count contract diff --git a/explorer.qmd b/explorer.qmd index 602131d..c73d1de 100644 --- a/explorer.qmd +++ b/explorer.qmd @@ -93,17 +93,24 @@ format: } .share-btn:hover { background: #0d47a1; } .share-toast { font-size: 12px; color: #2e7d32; opacity: 0; transition: opacity 0.3s; } - .search-bar { display: flex; gap: 6px; margin-bottom: 12px; } + .search-bar { display: flex; gap: 6px; margin-bottom: 6px; } .search-bar input { flex: 1; padding: 8px 12px; border: 1px solid #ccc; border-radius: 4px; font-size: 14px; outline: none; } .search-bar input:focus { border-color: #1565c0; box-shadow: 0 0 0 2px rgba(21,101,192,0.15); } - .search-bar button { - background: #1565c0; color: white; border: none; padding: 8px 16px; - border-radius: 4px; cursor: pointer; font-size: 14px; white-space: nowrap; + .search-actions { + display: flex; gap: 6px; margin-bottom: 8px; } - .search-bar button:hover { background: #0d47a1; } + .search-actions button { + flex: 1; border: none; padding: 8px 12px; border-radius: 4px; + cursor: pointer; font-size: 13px; font-weight: 500; color: white; + white-space: nowrap; + } + .search-actions #searchAreaBtn { background: #ef6c00; } + .search-actions #searchAreaBtn:hover { background: #e65100; } + .search-actions #searchWorldBtn { background: #1565c0; } + .search-actions #searchWorldBtn:hover { background: #0d47a1; } .search-results { font-size: 12px; color: #666; padding: 4px 0; } .view-toolbar { display: flex; @@ -240,7 +247,10 @@ Circle size = log(sample count). Color = dominant data source. +
+ +
Searches labels, descriptions, and place names. First search can take 10-15 seconds while data loads; subsequent searches are faster. @@ -1817,21 +1827,47 @@ zoomWatcher = { } // --- Search handler --- - const searchBtn = document.getElementById('searchBtn'); + const searchAreaBtn = document.getElementById('searchAreaBtn'); + const searchWorldBtn = document.getElementById('searchWorldBtn'); const searchInput = document.getElementById('sampleSearch'); const searchResults = document.getElementById('searchResults'); let _searchSeq = 0; + // Initial scope hydrated from URL; default 'world' on missing/unknown. + let _searchScope = ( + new URLSearchParams(location.search).get('search_scope') === 'area' + ) ? 'area' : 'world'; + + function persistSearchScope(scope) { + // writeQueryState() doesn't know about scope; keep the URL param + // honest by manipulating directly. 'world' is default, omitted from + // URL. + const params = new URLSearchParams(location.search); + if (scope === 'area') params.set('search_scope', 'area'); + else params.delete('search_scope'); + const qs = params.toString(); + const url = `${location.pathname}${qs ? `?${qs}` : ''}${location.hash}`; + if (url !== `${location.pathname}${location.search}${location.hash}`) { + history.replaceState(null, '', url); + } + } + + async function doSearch(scope) { + if (scope === 'area' || scope === 'world') _searchScope = scope; + const effectiveScope = _searchScope; - async function doSearch() { const term = searchInput.value.trim(); if (!term || term.length < 2) { searchResults.textContent = 'Type at least 2 characters'; writeQueryState(); + persistSearchScope(effectiveScope); return; } writeQueryState(); - searchResults.textContent = 'Searching...'; + persistSearchScope(effectiveScope); + searchResults.textContent = effectiveScope === 'area' + ? 'Searching selected areas...' + : 'Searching entire world...'; // Per-search perf instrumentation (#167). Captures cold/warm latency, // result count, and bytes transferred from data.isamples.org during @@ -1866,33 +1902,94 @@ zoomWatcher = { // DuckDB benchmark: naive 4.2 s vs CTE 0.5 s for `pottery`. // The browser DuckDB-WASM penalty makes the difference even // more pronounced; the naive form times out on `pottery` cold. + // Use `f.`-qualified columns so the same searchWhere/score + // strings work for both the world-mode CTE (single table aliased + // f) and the area-mode INNER JOIN (f + l, both via USING (pid)). const searchWhere = textSearchWhere(terms, [ - 'label', - 'description', - 'CAST(place_name AS VARCHAR)', + 'f.label', + 'f.description', + 'CAST(f.place_name AS VARCHAR)', ]); const score = textSearchScore(terms, [ - { col: 'label', weight: 3 }, - { col: 'description', weight: 1 }, - { col: 'CAST(place_name AS VARCHAR)', weight: 2 }, + { col: 'f.label', weight: 3 }, + { col: 'f.description', weight: 1 }, + { col: 'CAST(f.place_name AS VARCHAR)', weight: 2 }, ]); - const results = await db.query(` - WITH matches AS ( - SELECT pid, label, source, place_name, - (${score}) AS relevance_score - FROM read_parquet('${facets_url}') - WHERE ${searchWhere} - ${sourceFilterSQL('source')} - ${facetFilterSQL()} - ORDER BY relevance_score DESC - LIMIT 50 - ) - SELECT m.pid, m.label, m.source, l.latitude, l.longitude, - m.place_name, m.relevance_score - FROM matches m - LEFT JOIN read_parquet('${lite_url}') l USING (pid) - ORDER BY m.relevance_score DESC, m.label - `); + + // Two SQL shapes — one per scope. The fix per #179 round-2 + // review: in area mode, the viewport predicate MUST run BEFORE + // the top-50 selection, otherwise we're searching only the + // global top-50 within the area ("current viewport among the + // global top 50") rather than "top 50 within the current + // viewport." For broad terms like `pottery`, the global top-50 + // is concentrated in a few hot regions, so a Sudan-area query + // would return zero even though Sudan has plenty of pottery. + // + // World mode keeps the original CTE-then-LEFT-JOIN shape so + // samples that have facets but no `samples_map_lite` row + // (i.e., no coordinates) still appear in the results, with + // null lat/lng. The click-to-fly handler already guards on + // isNaN(lat). + // + // Area mode uses INNER JOIN inside the candidate selection + // because area-scoped search by definition requires + // coordinates. Drop coord-less samples before ranking; apply + // the viewport predicate; THEN top-50. + let results; + if (effectiveScope === 'area') { + const rect = viewer.camera.computeViewRectangle(viewer.scene.globe.ellipsoid); + if (!rect) { + // Camera couldn't produce a view rectangle (shouldn't + // happen in practice; defensive). Fall through to the + // world query so the user gets results, with a console + // hint for diagnostics. + console.warn('Area scope requested but no view rectangle; falling back to world.'); + results = await runWorldQuery(); + } else { + const south = Cesium.Math.toDegrees(rect.south); + const north = Cesium.Math.toDegrees(rect.north); + const west = Cesium.Math.toDegrees(rect.west); + const east = Cesium.Math.toDegrees(rect.east); + const lngClause = (west > east) + ? `(l.longitude BETWEEN ${west} AND 180 OR l.longitude BETWEEN -180 AND ${east})` + : `l.longitude BETWEEN ${west} AND ${east}`; + results = await db.query(` + SELECT f.pid, f.label, f.source, l.latitude, l.longitude, + f.place_name, (${score}) AS relevance_score + FROM read_parquet('${facets_url}') f + INNER JOIN read_parquet('${lite_url}') l USING (pid) + WHERE ${searchWhere} + AND l.latitude BETWEEN ${south} AND ${north} + AND ${lngClause} + ${sourceFilterSQL('f.source')} + ${facetFilterSQL()} + ORDER BY relevance_score DESC, f.label + LIMIT 50 + `); + } + } else { + results = await runWorldQuery(); + } + + async function runWorldQuery() { + return db.query(` + WITH matches AS ( + SELECT f.pid, f.label, f.source, f.place_name, + (${score}) AS relevance_score + FROM read_parquet('${facets_url}') f + WHERE ${searchWhere} + ${sourceFilterSQL('f.source')} + ${facetFilterSQL()} + ORDER BY relevance_score DESC + LIMIT 50 + ) + SELECT m.pid, m.label, m.source, l.latitude, l.longitude, + m.place_name, m.relevance_score + FROM matches m + LEFT JOIN read_parquet('${lite_url}') l USING (pid) + ORDER BY m.relevance_score DESC, m.label + `); + } resultsCount = results.length; if (results.length === 0) { searchResults.textContent = `No results for "${term}"`; @@ -1934,8 +2031,10 @@ zoomWatcher = { }); } - // Fly to the first result - if (results[0].latitude && results[0].longitude) { + // Fly to the first result. Skip for area-scoped searches — + // the user is already at the area they care about; flying + // would zoom in and disorient. + if (effectiveScope === 'world' && results[0].latitude && results[0].longitude) { viewer.camera.flyTo({ destination: Cesium.Cartesian3.fromDegrees(results[0].longitude, results[0].latitude, 200000), duration: 1.5 @@ -1980,6 +2079,7 @@ zoomWatcher = { id: searchId, term: term, terms_count: terms.length, + scope: effectiveScope, results_count: resultsCount, elapsed_ms: Math.round(elapsedMs), bytes_transfer: transferBytes, @@ -2006,7 +2106,7 @@ zoomWatcher = { const tr = document.createElement('tr'); const labelCell = document.createElement('td'); labelCell.style.cssText = 'padding:1px 8px 1px 0;color:#bbb;'; - labelCell.textContent = `search #${searchId}: "${term}" (${resultsCount})`; + labelCell.textContent = `search #${searchId} ${effectiveScope}: "${term}" (${resultsCount})`; const valCell = document.createElement('td'); valCell.style.cssText = 'padding:1px 0;text-align:right;color:#a5d6a7;font-variant-numeric:tabular-nums;'; valCell.textContent = fmt(elapsedMs); @@ -2019,13 +2119,17 @@ zoomWatcher = { } } - if (searchBtn) searchBtn.addEventListener('click', doSearch); + if (searchAreaBtn) searchAreaBtn.addEventListener('click', () => doSearch('area')); + if (searchWorldBtn) searchWorldBtn.addEventListener('click', () => doSearch('world')); + // Enter key uses the last-clicked scope (or the URL-hydrated scope if + // no button has been clicked yet). Defaults to 'world' for keyboard-only + // users on first invocation. if (searchInput) searchInput.addEventListener('keydown', (e) => { - if (e.key === 'Enter') doSearch(); + if (e.key === 'Enter') doSearch(_searchScope); }); if (searchInput && searchInput.value.trim().length >= 2) { - doSearch(); + doSearch(_searchScope); } refreshFacetCounts(); diff --git a/tests/search_baseline_2026-05-08.json b/tests/search_baseline_2026-05-08.json index c06be25..e339d01 100644 --- a/tests/search_baseline_2026-05-08.json +++ b/tests/search_baseline_2026-05-08.json @@ -1,8 +1,8 @@ { - "site_url": "http://localhost:5860", - "captured_at_utc": "2026-05-08T14:47:22.699069+00:00", + "site_url": "http://localhost:5880", + "captured_at_utc": "2026-05-08T21:50:25.268044+00:00", "schema_version": 1, - "field_subset": "label+place_name (samples_map_lite.parquet)", + "field_subset": "label+description+place_name (sample_facets_v2 + lite for coords; world via LEFT JOIN, area via INNER JOIN with viewport predicate inside CTE)", "queries": [ { "label": "single-common", @@ -13,8 +13,9 @@ "id": 1, "term": "pottery", "terms_count": 1, + "scope": "world", "results_count": 50, - "elapsed_ms": 8683, + "elapsed_ms": 10534, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -27,8 +28,9 @@ "id": 2, "term": "pottery", "terms_count": 1, + "scope": "world", "results_count": 50, - "elapsed_ms": 1940, + "elapsed_ms": 4566, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -46,8 +48,9 @@ "id": 1, "term": "basalt", "terms_count": 1, + "scope": "world", "results_count": 50, - "elapsed_ms": 11824, + "elapsed_ms": 9744, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -60,8 +63,9 @@ "id": 2, "term": "basalt", "terms_count": 1, + "scope": "world", "results_count": 50, - "elapsed_ms": 2400, + "elapsed_ms": 4493, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -79,8 +83,9 @@ "id": 1, "term": "pottery Cyprus", "terms_count": 2, - "results_count": 0, - "elapsed_ms": 5071, + "scope": "world", + "results_count": 50, + "elapsed_ms": 10013, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -93,8 +98,9 @@ "id": 2, "term": "pottery Cyprus", "terms_count": 2, - "results_count": 0, - "elapsed_ms": 2560, + "scope": "world", + "results_count": 50, + "elapsed_ms": 4515, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -112,8 +118,9 @@ "id": 1, "term": "xyzzyqqqplugh", "terms_count": 1, + "scope": "world", "results_count": 0, - "elapsed_ms": 4870, + "elapsed_ms": 9840, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -126,8 +133,9 @@ "id": 2, "term": "xyzzyqqqplugh", "terms_count": 1, + "scope": "world", "results_count": 0, - "elapsed_ms": 2554, + "elapsed_ms": 4644, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -145,8 +153,9 @@ "id": 1, "term": "100%", "terms_count": 1, - "results_count": 0, - "elapsed_ms": 5178, + "scope": "world", + "results_count": 50, + "elapsed_ms": 12216, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -159,8 +168,9 @@ "id": 2, "term": "100%", "terms_count": 1, - "results_count": 0, - "elapsed_ms": 2574, + "scope": "world", + "results_count": 50, + "elapsed_ms": 4362, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -178,8 +188,9 @@ "id": 1, "term": "_test", "terms_count": 1, + "scope": "world", "results_count": 0, - "elapsed_ms": 18520, + "elapsed_ms": 10134, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -192,8 +203,9 @@ "id": 2, "term": "_test", "terms_count": 1, + "scope": "world", "results_count": 0, - "elapsed_ms": 2529, + "elapsed_ms": 4575, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -211,8 +223,9 @@ "id": 1, "term": "\u00c7atalh\u00f6y\u00fck", "terms_count": 1, - "results_count": 0, - "elapsed_ms": 4415, + "scope": "world", + "results_count": 50, + "elapsed_ms": 13237, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -225,8 +238,9 @@ "id": 2, "term": "\u00c7atalh\u00f6y\u00fck", "terms_count": 1, - "results_count": 0, - "elapsed_ms": 2592, + "scope": "world", + "results_count": 50, + "elapsed_ms": 4878, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -248,8 +262,9 @@ "id": 1, "term": "pottery", "terms_count": 1, + "scope": "world", "results_count": 50, - "elapsed_ms": 4964, + "elapsed_ms": 6072, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -262,8 +277,9 @@ "id": 2, "term": "pottery", "terms_count": 1, + "scope": "world", "results_count": 50, - "elapsed_ms": 1368, + "elapsed_ms": 5486, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -286,8 +302,9 @@ "id": 1, "term": "pottery", "terms_count": 1, + "scope": "world", "results_count": 0, - "elapsed_ms": 4331, + "elapsed_ms": 6582, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -300,8 +317,9 @@ "id": 2, "term": "pottery", "terms_count": 1, + "scope": "world", "results_count": 0, - "elapsed_ms": 2106, + "elapsed_ms": 3427, "bytes_transfer": 0, "bytes_body": 0, "seen_urls": [], @@ -309,6 +327,43 @@ "has_facet_filter": true, "error": null } + }, + { + "label": "area-scope", + "term": "pottery", + "filters": { + "scope": "area" + }, + "cold": { + "event": "isamples.search", + "id": 1, + "term": "pottery", + "terms_count": 1, + "scope": "area", + "results_count": 50, + "elapsed_ms": 10483, + "bytes_transfer": 0, + "bytes_body": 0, + "seen_urls": [], + "has_source_filter": false, + "has_facet_filter": false, + "error": null + }, + "warm": { + "event": "isamples.search", + "id": 2, + "term": "pottery", + "terms_count": 1, + "scope": "area", + "results_count": 50, + "elapsed_ms": 4165, + "bytes_transfer": 0, + "bytes_body": 0, + "seen_urls": [], + "has_source_filter": false, + "has_facet_filter": false, + "error": null + } } ] -} +} \ No newline at end of file diff --git a/tests/test_search_perf.py b/tests/test_search_perf.py index ce42b88..9257846 100644 --- a/tests/test_search_perf.py +++ b/tests/test_search_perf.py @@ -69,6 +69,16 @@ "material_first_n": 1, }, }, + { + # Viewport-scoped search per #178 Light path. Camera position is + # set via the URL hash (Mediterranean / Cyprus area) so the + # area-scope predicate has a meaningful rect. Clicks the + # "Search Selected Areas" button instead of "Search Entire World". + "label": "area-scope", + "term": "pottery", + "filters": {"scope": "area"}, + "url_hash": "v=1&lat=35&lng=33&alt=2000000", + }, ] @@ -156,15 +166,23 @@ def _apply_material_first_n(page, n: int) -> None: _wait_for_facet_settle(page) -def _run_search(page, term: str, *, captured: list, expected_id_after: int) -> dict: - """Type term, click search, wait for the corresponding console event.""" +def _run_search( + page, + term: str, + *, + captured: list, + expected_id_after: int, + scope: str = "world", +) -> dict: + """Type term, click the appropriate scope button, wait for the console event.""" search_input = page.locator("#sampleSearch") search_input.click() # Clear via select-all + delete (faster + works around platform shortcuts). search_input.press("ControlOrMeta+a") search_input.press("Delete") search_input.fill(term) - page.locator("#searchBtn").click() + button_id = "#searchAreaBtn" if scope == "area" else "#searchWorldBtn" + page.locator(button_id).click() # Wait for an isamples.search log whose id is strictly greater than the # last one we observed. Polling is simpler than promise-based waits here. @@ -207,7 +225,12 @@ def _measure_one_query(browser, query: dict) -> dict: page = context.new_page() captured: list = [] _collect_search_logs(page, captured) - page.goto(EXPLORER_URL, wait_until="domcontentloaded", timeout=60_000) + + # Optional URL hash for area-scope cases (#178) — sets the camera + # before the search runs so the area predicate has a meaningful rect. + url_hash = query.get("url_hash") + target_url = EXPLORER_URL + (f"#{url_hash}" if url_hash else "") + page.goto(target_url, wait_until="domcontentloaded", timeout=60_000) _wait_for_explorer_ready(page) filters = query["filters"] @@ -215,12 +238,15 @@ def _measure_one_query(browser, query: dict) -> dict: _apply_source_filter(page, filters["source_only"]) if "material_first_n" in filters: _apply_material_first_n(page, filters["material_first_n"]) + scope = filters.get("scope", "world") cold = _run_search( - page, query["term"], captured=captured, expected_id_after=0 + page, query["term"], captured=captured, expected_id_after=0, + scope=scope, ) warm = _run_search( - page, query["term"], captured=captured, expected_id_after=cold["id"] + page, query["term"], captured=captured, expected_id_after=cold["id"], + scope=scope, ) finally: context.close() @@ -273,7 +299,7 @@ def test_record_search_baseline(browser, benchmark_run_started_at, baseline_outp "site_url": SITE_URL, "captured_at_utc": benchmark_run_started_at.isoformat(), "schema_version": 1, - "field_subset": "label+place_name (samples_map_lite.parquet)", + "field_subset": "label+description+place_name (sample_facets_v2 + lite for coords; world via LEFT JOIN, area via INNER JOIN with viewport predicate inside CTE)", "queries": results, } baseline_output_path.write_text(json.dumps(payload, indent=2) + "\n")