Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/nameres/handlers/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Shared handler behavior for NameResolution API endpoints."""

import json

from biothings.web.handlers import BaseHandler


Expand All @@ -25,3 +27,7 @@ def set_default_headers(self):

def options(self, *args, **kwargs):
self.finish()

def finish_json(self, response: dict | list) -> None:
self.set_header("Content-Type", "application/json; charset=UTF-8")
super().finish(json.dumps(response))
84 changes: 44 additions & 40 deletions src/nameres/handlers/lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
Converted from SOLR -> Elasticsearch
"""

import collections
import dataclasses
import logging
import json
import logging
import re
from typing import Optional

Expand All @@ -27,7 +26,8 @@ class LookupArgumentException(Exception):

@dataclasses.dataclass()
class LookupQuery:
string: str
raw_string: str
query_strings: tuple[str, ...]
autocomplete: Optional[bool]
highlighting: Optional[bool]
offset: Optional[int]
Expand Down Expand Up @@ -146,9 +146,10 @@ def parse_boolean(argument: str | bool) -> bool:
raise LookupArgumentException(lookup_message)

self.lookup_queries = []
for search_string in sanitized_lookup_strings:
for raw_string, query_strings in sanitized_lookup_strings:
lookup_query = LookupQuery(
string=search_string,
raw_string=raw_string,
query_strings=query_strings,
autocomplete=autocomplete_option,
highlighting=highlighting_option,
offset=offset_option,
Expand All @@ -174,7 +175,7 @@ def _parse_lookup_string_arguments(self) -> list[str]:
lookup_strings.extend(search_string_collection)
return lookup_strings

def _sanitize_lookup_query(self, lookup_strings: list[str]) -> list[tuple[str]]:
def _sanitize_lookup_query(self, lookup_strings: list[str]) -> list[tuple[str, tuple[str, ...]]]:
r"""Performs input sanitization on the lookup query terms.

Sanitization Operations:
Expand Down Expand Up @@ -203,7 +204,8 @@ def _sanitize_lookup_query(self, lookup_strings: list[str]) -> list[tuple[str]]:
"""
sanitized_lookup_strings = []
for lookup_string in lookup_strings:
lookup_string = lookup_string.strip().lower()
raw_lookup_string = lookup_string.strip()
lookup_string = raw_lookup_string.lower()

windows_smart_single_quote_pattern = r"[‘’]"
windows_smart_double_quote_pattern = r"[“”]"
Expand Down Expand Up @@ -240,7 +242,11 @@ def _sanitize_lookup_query(self, lookup_strings: list[str]) -> list[tuple[str]]:
fully_escaped_lookup_string = fully_escaped_lookup_string.replace("&&", " ")
fully_escaped_lookup_string = fully_escaped_lookup_string.replace("||", " ")

sanitized_lookup_strings.append(set([lookup_string_with_escaped_groups, fully_escaped_lookup_string]))
query_strings = [lookup_string_with_escaped_groups]
if fully_escaped_lookup_string != lookup_string_with_escaped_groups:
query_strings.append(fully_escaped_lookup_string)

sanitized_lookup_strings.append((raw_lookup_string, tuple(query_strings)))

return sanitized_lookup_strings

Expand Down Expand Up @@ -338,15 +344,15 @@ async def get(self):
lookup_result = await lookup(self.biothings, self.lookup_queries[0], self.filters)
except Exception as gen_exc:
raise HTTPError(detail="Error occurred during processing.", status_code=500) from gen_exc
self.finish(lookup_result)
self.finish_json(lookup_result)

async def post(self):
"""Returns cliques with a name or synonym that contains a specified string."""
try:
lookup_result = await lookup(self.biothings, self.lookup_queries[0], self.filters)
except Exception as gen_exc:
raise HTTPError(detail="Error occurred during processing.", status_code=500) from gen_exc
self.finish(lookup_result)
self.finish_json(lookup_result)


class NameResolutionBulkLookupHandler(BaseNameResolutionLookupHandler):
Expand All @@ -359,23 +365,22 @@ class NameResolutionBulkLookupHandler(BaseNameResolutionLookupHandler):

name = "bulk-lookup"

async def post(self) -> dict[str, collections.OrderedDict]:
async def post(self) -> None:
"""Returns cliques with a name or synonym that contains a specified string sent via batch."""

try:
lookup_results = {}
for lookup_query in self.lookup_queries:
lookup_result: collections.OrderedDict = await lookup(self.biothings, lookup_query, self.filters)
lookup_key: str = lookup_query.string.pop()
lookup_results[lookup_key] = lookup_result
lookup_result: list[dict] = await lookup(self.biothings, lookup_query, self.filters)
lookup_results[lookup_query.raw_string] = lookup_result
except Exception as gen_exc:
raise HTTPError(detail="Error occurred during processing.", status_code=500) from gen_exc
self.finish(lookup_results)


async def lookup(
biothings_metadata: NameResolutionAPINamespace, lookup_query: list[LookupQuery], filters: dict
) -> collections.OrderedDict:
biothings_metadata: NameResolutionAPINamespace, lookup_query: LookupQuery, filters: dict
) -> list[dict]:
Comment thread
shuchenliu marked this conversation as resolved.
"""Returns cliques with a name or synonym that contains a specified string."""
elasticsearch_query = _build_elasticsearch_query(lookup_query, filters)

Expand Down Expand Up @@ -404,10 +409,7 @@ async def lookup(
}
lookup_response = await biothings_metadata.elasticsearch.async_client.search(**search_parameters)

# https://www.tornadoweb.org/en/stable/web.html#tornado.web.RequestHandler.write
# We have to change the API slightly here due to security requirements around returning
# list-objects as the API response
outputs = collections.OrderedDict()
outputs = []
for doc in lookup_response["hits"]["hits"]:
preferred_matches = []
synonym_matches = []
Expand All @@ -419,34 +421,36 @@ async def lookup(

source = doc["_source"]
curie_identifier = source.get("curie", "")
outputs[curie_identifier] = dataclasses.asdict(
LookupResult(
curie=curie_identifier,
label=source.get("preferred_name", ""),
highlighting=(
{
"labels": preferred_matches,
"synonyms": synonym_matches,
}
if lookup_query.highlighting
else {}
),
synonyms=source.get("names", []),
score=doc.get("_score", ""),
taxa=source.get("taxa", []),
clique_identifier_count=source.get("clique_identifier_count", 0),
types=[f"biolink:{d}" for d in source.get("biolink_types", [])],
outputs.append(
dataclasses.asdict(
LookupResult(
curie=curie_identifier,
label=source.get("preferred_name", ""),
highlighting=(
{
"labels": preferred_matches,
"synonyms": synonym_matches,
}
if lookup_query.highlighting
else {}
),
synonyms=source.get("names", []),
score=doc.get("_score", ""),
taxa=source.get("taxa", []),
clique_identifier_count=source.get("clique_identifier_count", 0),
types=[f"biolink:{d}" for d in source.get("biolink_types", [])],
)
)
)

return outputs


def _build_elasticsearch_query(lookup_query: list[LookupQuery], filters: dict) -> dict:
def _build_elasticsearch_query(lookup_query: LookupQuery, filters: dict) -> dict:
queries = []

# Base Query
for lookup_string in lookup_query.string:
for lookup_string in lookup_query.query_strings:
queries.append(
{
"multi_match": {
Expand All @@ -459,7 +463,7 @@ def _build_elasticsearch_query(lookup_query: list[LookupQuery], filters: dict) -

# https://www.elastic.co/search-labs/blog/elasticsearch-autocomplete-search#2.-query-time
if lookup_query.autocomplete:
for lookup_string in lookup_query.string:
for lookup_string in lookup_query.query_strings:
queries.append(
{
"multi_match": {
Expand Down
16 changes: 8 additions & 8 deletions test/test_api_contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ def test_lookup_get_returns_result_shape(nameres_server):
status, _, body = _request_json(nameres_server, "/lookup?string=aspirin&limit=1")

assert status == 200
assert isinstance(body, dict)
assert isinstance(body, list)
assert body

first_result = next(iter(body.values()))
first_result = body[0]
assert first_result["curie"]
assert first_result["label"]
assert isinstance(first_result["synonyms"], list)
Expand All @@ -149,27 +149,27 @@ def test_lookup_accepts_issue_8_query_shape(nameres_server):
status, _, body = _request_json(nameres_server, f"/lookup?{query}")

assert status == 200
assert isinstance(body, dict)
assert isinstance(body, list)


def test_bulk_lookup_post_returns_results_by_input_string(nameres_server):
status, _, body = _request_json(
nameres_server,
"/bulk-lookup?limit=1",
method="POST",
body={"strings": ["aspirin", "diabetes"]},
body={"strings": [" Aspirin ", "diabetes"]},
)

assert status == 200
assert set(body) == {"aspirin", "diabetes"}
assert isinstance(body["aspirin"], dict)
assert isinstance(body["diabetes"], dict)
assert set(body) == {"Aspirin", "diabetes"}
assert isinstance(body["Aspirin"], list)
assert isinstance(body["diabetes"], list)


def test_synonyms_post_returns_known_and_missing_curies(nameres_server):
lookup_status, _, lookup_body = _request_json(nameres_server, "/lookup?string=aspirin&limit=1")
assert lookup_status == 200
known_curie = next(iter(lookup_body))
known_curie = lookup_body[0]["curie"]

status, _, body = _request_json(
nameres_server,
Expand Down