From eb4c38e35dc48035e92f83d7196b5925bf207773 Mon Sep 17 00:00:00 2001 From: shuchenliu Date: Fri, 8 May 2026 14:22:51 -0700 Subject: [PATCH 1/3] Return lookup results as arrays --- src/nameres/handlers/base.py | 6 ++++ src/nameres/handlers/lookup.py | 54 ++++++++++++++++------------------ test/test_api_contract.py | 12 ++++---- 3 files changed, 38 insertions(+), 34 deletions(-) diff --git a/src/nameres/handlers/base.py b/src/nameres/handlers/base.py index df0dfb4..9755011 100644 --- a/src/nameres/handlers/base.py +++ b/src/nameres/handlers/base.py @@ -1,5 +1,7 @@ """Shared handler behavior for NameResolution API endpoints.""" +import json + from biothings.web.handlers import BaseHandler @@ -25,3 +27,7 @@ def set_default_headers(self): def options(self, *args, **kwargs): self.finish() + + def finish_json(self, response: dict | list) -> None: + self.set_header("Content-Type", "application/json; charset=UTF-8") + super().finish(json.dumps(response)) diff --git a/src/nameres/handlers/lookup.py b/src/nameres/handlers/lookup.py index 87507c6..9a01400 100644 --- a/src/nameres/handlers/lookup.py +++ b/src/nameres/handlers/lookup.py @@ -4,10 +4,9 @@ Converted from SOLR -> Elasticsearch """ -import collections import dataclasses -import logging import json +import logging import re from typing import Optional @@ -338,7 +337,7 @@ async def get(self): lookup_result = await lookup(self.biothings, self.lookup_queries[0], self.filters) except Exception as gen_exc: raise HTTPError(detail="Error occurred during processing.", status_code=500) from gen_exc - self.finish(lookup_result) + self.finish_json(lookup_result) async def post(self): """Returns cliques with a name or synonym that contains a specified string.""" @@ -346,7 +345,7 @@ async def post(self): lookup_result = await lookup(self.biothings, self.lookup_queries[0], self.filters) except Exception as gen_exc: raise HTTPError(detail="Error occurred during processing.", status_code=500) from gen_exc - self.finish(lookup_result) + self.finish_json(lookup_result) class NameResolutionBulkLookupHandler(BaseNameResolutionLookupHandler): @@ -359,13 +358,13 @@ class NameResolutionBulkLookupHandler(BaseNameResolutionLookupHandler): name = "bulk-lookup" - async def post(self) -> dict[str, collections.OrderedDict]: + async def post(self) -> dict[str, list[dict]]: """Returns cliques with a name or synonym that contains a specified string sent via batch.""" try: lookup_results = {} for lookup_query in self.lookup_queries: - lookup_result: collections.OrderedDict = await lookup(self.biothings, lookup_query, self.filters) + lookup_result: list[dict] = await lookup(self.biothings, lookup_query, self.filters) lookup_key: str = lookup_query.string.pop() lookup_results[lookup_key] = lookup_result except Exception as gen_exc: @@ -375,7 +374,7 @@ async def post(self) -> dict[str, collections.OrderedDict]: async def lookup( biothings_metadata: NameResolutionAPINamespace, lookup_query: list[LookupQuery], filters: dict -) -> collections.OrderedDict: +) -> list[dict]: """Returns cliques with a name or synonym that contains a specified string.""" elasticsearch_query = _build_elasticsearch_query(lookup_query, filters) @@ -404,10 +403,7 @@ async def lookup( } lookup_response = await biothings_metadata.elasticsearch.async_client.search(**search_parameters) - # https://www.tornadoweb.org/en/stable/web.html#tornado.web.RequestHandler.write - # We have to change the API slightly here due to security requirements around returning - # list-objects as the API response - outputs = collections.OrderedDict() + outputs = [] for doc in lookup_response["hits"]["hits"]: preferred_matches = [] synonym_matches = [] @@ -419,23 +415,25 @@ async def lookup( source = doc["_source"] curie_identifier = source.get("curie", "") - outputs[curie_identifier] = dataclasses.asdict( - LookupResult( - curie=curie_identifier, - label=source.get("preferred_name", ""), - highlighting=( - { - "labels": preferred_matches, - "synonyms": synonym_matches, - } - if lookup_query.highlighting - else {} - ), - synonyms=source.get("names", []), - score=doc.get("_score", ""), - taxa=source.get("taxa", []), - clique_identifier_count=source.get("clique_identifier_count", 0), - types=[f"biolink:{d}" for d in source.get("biolink_types", [])], + outputs.append( + dataclasses.asdict( + LookupResult( + curie=curie_identifier, + label=source.get("preferred_name", ""), + highlighting=( + { + "labels": preferred_matches, + "synonyms": synonym_matches, + } + if lookup_query.highlighting + else {} + ), + synonyms=source.get("names", []), + score=doc.get("_score", ""), + taxa=source.get("taxa", []), + clique_identifier_count=source.get("clique_identifier_count", 0), + types=[f"biolink:{d}" for d in source.get("biolink_types", [])], + ) ) ) diff --git a/test/test_api_contract.py b/test/test_api_contract.py index 7c7e281..b5471b9 100644 --- a/test/test_api_contract.py +++ b/test/test_api_contract.py @@ -124,10 +124,10 @@ def test_lookup_get_returns_result_shape(nameres_server): status, _, body = _request_json(nameres_server, "/lookup?string=aspirin&limit=1") assert status == 200 - assert isinstance(body, dict) + assert isinstance(body, list) assert body - first_result = next(iter(body.values())) + first_result = body[0] assert first_result["curie"] assert first_result["label"] assert isinstance(first_result["synonyms"], list) @@ -149,7 +149,7 @@ def test_lookup_accepts_issue_8_query_shape(nameres_server): status, _, body = _request_json(nameres_server, f"/lookup?{query}") assert status == 200 - assert isinstance(body, dict) + assert isinstance(body, list) def test_bulk_lookup_post_returns_results_by_input_string(nameres_server): @@ -162,14 +162,14 @@ def test_bulk_lookup_post_returns_results_by_input_string(nameres_server): assert status == 200 assert set(body) == {"aspirin", "diabetes"} - assert isinstance(body["aspirin"], dict) - assert isinstance(body["diabetes"], dict) + assert isinstance(body["aspirin"], list) + assert isinstance(body["diabetes"], list) def test_synonyms_post_returns_known_and_missing_curies(nameres_server): lookup_status, _, lookup_body = _request_json(nameres_server, "/lookup?string=aspirin&limit=1") assert lookup_status == 200 - known_curie = next(iter(lookup_body)) + known_curie = lookup_body[0]["curie"] status, _, body = _request_json( nameres_server, From 4604350abcb759505c0bf4b74fd60d07e206e82d Mon Sep 17 00:00:00 2001 From: shuchenliu Date: Fri, 8 May 2026 14:40:17 -0700 Subject: [PATCH 2/3] Address lookup review comments --- src/nameres/handlers/lookup.py | 30 ++++++++++++++++++------------ test/test_api_contract.py | 6 +++--- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/nameres/handlers/lookup.py b/src/nameres/handlers/lookup.py index 9a01400..ba2ec61 100644 --- a/src/nameres/handlers/lookup.py +++ b/src/nameres/handlers/lookup.py @@ -26,7 +26,8 @@ class LookupArgumentException(Exception): @dataclasses.dataclass() class LookupQuery: - string: str + raw_string: str + query_strings: tuple[str, ...] autocomplete: Optional[bool] highlighting: Optional[bool] offset: Optional[int] @@ -145,9 +146,10 @@ def parse_boolean(argument: str | bool) -> bool: raise LookupArgumentException(lookup_message) self.lookup_queries = [] - for search_string in sanitized_lookup_strings: + for raw_string, query_strings in sanitized_lookup_strings: lookup_query = LookupQuery( - string=search_string, + raw_string=raw_string, + query_strings=query_strings, autocomplete=autocomplete_option, highlighting=highlighting_option, offset=offset_option, @@ -173,7 +175,7 @@ def _parse_lookup_string_arguments(self) -> list[str]: lookup_strings.extend(search_string_collection) return lookup_strings - def _sanitize_lookup_query(self, lookup_strings: list[str]) -> list[tuple[str]]: + def _sanitize_lookup_query(self, lookup_strings: list[str]) -> list[tuple[str, tuple[str, ...]]]: r"""Performs input sanitization on the lookup query terms. Sanitization Operations: @@ -202,6 +204,7 @@ def _sanitize_lookup_query(self, lookup_strings: list[str]) -> list[tuple[str]]: """ sanitized_lookup_strings = [] for lookup_string in lookup_strings: + raw_lookup_string = lookup_string lookup_string = lookup_string.strip().lower() windows_smart_single_quote_pattern = r"[‘’]" @@ -239,7 +242,11 @@ def _sanitize_lookup_query(self, lookup_strings: list[str]) -> list[tuple[str]]: fully_escaped_lookup_string = fully_escaped_lookup_string.replace("&&", " ") fully_escaped_lookup_string = fully_escaped_lookup_string.replace("||", " ") - sanitized_lookup_strings.append(set([lookup_string_with_escaped_groups, fully_escaped_lookup_string])) + query_strings = [lookup_string_with_escaped_groups] + if fully_escaped_lookup_string != lookup_string_with_escaped_groups: + query_strings.append(fully_escaped_lookup_string) + + sanitized_lookup_strings.append((raw_lookup_string, tuple(query_strings))) return sanitized_lookup_strings @@ -358,22 +365,21 @@ class NameResolutionBulkLookupHandler(BaseNameResolutionLookupHandler): name = "bulk-lookup" - async def post(self) -> dict[str, list[dict]]: + async def post(self) -> None: """Returns cliques with a name or synonym that contains a specified string sent via batch.""" try: lookup_results = {} for lookup_query in self.lookup_queries: lookup_result: list[dict] = await lookup(self.biothings, lookup_query, self.filters) - lookup_key: str = lookup_query.string.pop() - lookup_results[lookup_key] = lookup_result + lookup_results[lookup_query.raw_string] = lookup_result except Exception as gen_exc: raise HTTPError(detail="Error occurred during processing.", status_code=500) from gen_exc self.finish(lookup_results) async def lookup( - biothings_metadata: NameResolutionAPINamespace, lookup_query: list[LookupQuery], filters: dict + biothings_metadata: NameResolutionAPINamespace, lookup_query: LookupQuery, filters: dict ) -> list[dict]: """Returns cliques with a name or synonym that contains a specified string.""" elasticsearch_query = _build_elasticsearch_query(lookup_query, filters) @@ -440,11 +446,11 @@ async def lookup( return outputs -def _build_elasticsearch_query(lookup_query: list[LookupQuery], filters: dict) -> dict: +def _build_elasticsearch_query(lookup_query: LookupQuery, filters: dict) -> dict: queries = [] # Base Query - for lookup_string in lookup_query.string: + for lookup_string in lookup_query.query_strings: queries.append( { "multi_match": { @@ -457,7 +463,7 @@ def _build_elasticsearch_query(lookup_query: list[LookupQuery], filters: dict) - # https://www.elastic.co/search-labs/blog/elasticsearch-autocomplete-search#2.-query-time if lookup_query.autocomplete: - for lookup_string in lookup_query.string: + for lookup_string in lookup_query.query_strings: queries.append( { "multi_match": { diff --git a/test/test_api_contract.py b/test/test_api_contract.py index b5471b9..d895b24 100644 --- a/test/test_api_contract.py +++ b/test/test_api_contract.py @@ -157,12 +157,12 @@ def test_bulk_lookup_post_returns_results_by_input_string(nameres_server): nameres_server, "/bulk-lookup?limit=1", method="POST", - body={"strings": ["aspirin", "diabetes"]}, + body={"strings": ["Aspirin", "diabetes"]}, ) assert status == 200 - assert set(body) == {"aspirin", "diabetes"} - assert isinstance(body["aspirin"], list) + assert set(body) == {"Aspirin", "diabetes"} + assert isinstance(body["Aspirin"], list) assert isinstance(body["diabetes"], list) From 54940a47f4ed713a4e4e9341b44b6f9de1136751 Mon Sep 17 00:00:00 2001 From: shuchenliu Date: Fri, 8 May 2026 14:52:07 -0700 Subject: [PATCH 3/3] Strip bulk lookup response keys --- src/nameres/handlers/lookup.py | 4 ++-- test/test_api_contract.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nameres/handlers/lookup.py b/src/nameres/handlers/lookup.py index ba2ec61..8e940ee 100644 --- a/src/nameres/handlers/lookup.py +++ b/src/nameres/handlers/lookup.py @@ -204,8 +204,8 @@ def _sanitize_lookup_query(self, lookup_strings: list[str]) -> list[tuple[str, t """ sanitized_lookup_strings = [] for lookup_string in lookup_strings: - raw_lookup_string = lookup_string - lookup_string = lookup_string.strip().lower() + raw_lookup_string = lookup_string.strip() + lookup_string = raw_lookup_string.lower() windows_smart_single_quote_pattern = r"[‘’]" windows_smart_double_quote_pattern = r"[“”]" diff --git a/test/test_api_contract.py b/test/test_api_contract.py index d895b24..28ed103 100644 --- a/test/test_api_contract.py +++ b/test/test_api_contract.py @@ -157,7 +157,7 @@ def test_bulk_lookup_post_returns_results_by_input_string(nameres_server): nameres_server, "/bulk-lookup?limit=1", method="POST", - body={"strings": ["Aspirin", "diabetes"]}, + body={"strings": [" Aspirin ", "diabetes"]}, ) assert status == 200