import abc
import logging
import re
import typing
from hpotk.model import (
TermId,
Term,
MinimalTerm,
Synonym,
SynonymType,
SynonymCategory,
Definition,
)
from ._model import Node, Meta, SynonymPropertyValue
logger = logging.getLogger(__name__)
MINIMAL_TERM = typing.TypeVar("MINIMAL_TERM", bound=MinimalTerm)
OBO_PURL_PT = re.compile(r"^http://purl\.obolibrary\.org/obo/(?P<value>.+)$")
HP_VAL_PT = re.compile(r"^hp(.*)#(?P<value>.+)$")
ORCID_PT = re.compile(r".*orcid\.org/(?P<orcid>\d{4}-\d{4}-\d{4}-\d{4})$")
def create_alt_term_ids(node: Node) -> typing.List[TermId]:
alt_term_ids = []
if node.meta:
for bpv in node.meta.basic_property_values:
if bpv.pred is not None and bpv.val is not None and bpv.pred.endswith("#hasAlternativeId"):
alt_term_ids.append(TermId.from_curie(bpv.val))
return alt_term_ids
def create_synonyms(meta: Meta) -> typing.Optional[typing.List[Synonym]]:
if len(meta.synonyms) == 0:
return None
else:
return [parse_synonym(s) for s in meta.synonyms]
def parse_synonym(spv: SynonymPropertyValue) -> Synonym:
synonym_category: typing.Optional[SynonymCategory] = parse_synonym_category(spv.pred)
synonym_type: typing.Optional[SynonymType] = parse_synonym_type(spv.synonym_type)
if len(spv.xrefs) != 0:
xrefs = []
for xref in spv.xrefs:
parsed = parse_synonym_xref(xref)
if parsed is not None:
xrefs.append(parsed)
xrefs = list(xrefs) if len(xrefs) != 0 else None # shrink to fit
else:
xrefs = None
return Synonym(
name=spv.val,
synonym_category=synonym_category,
synonym_type=synonym_type,
xrefs=xrefs,
)
def parse_synonym_category(synonym_category: str) -> typing.Optional[SynonymCategory]:
if synonym_category == "hasRelatedSynonym":
return SynonymCategory.RELATED
elif synonym_category == "hasExactSynonym":
return SynonymCategory.EXACT
elif synonym_category == "hasBroadSynonym":
return SynonymCategory.BROAD
elif synonym_category == "hasNarrowSynonym":
return SynonymCategory.NARROW
else:
logger.debug(f"Unknown synonym category {synonym_category}")
return None
def parse_synonym_type(synonym_type: str) -> typing.Optional[SynonymType]:
if synonym_type is None or len(synonym_type) == 0:
return None
hp_obo_matcher = OBO_PURL_PT.match(synonym_type)
if hp_obo_matcher:
value = hp_obo_matcher.group("value")
hp_matcher = HP_VAL_PT.match(value)
if hp_matcher:
value = hp_matcher.group("value")
if value in ("layperson", "layperson term"):
return SynonymType.LAYPERSON_TERM
elif value == "abbreviation":
return SynonymType.ABBREVIATION
elif value == "uk_spelling":
return SynonymType.UK_SPELLING
elif value == "obsolete_synonym":
return SynonymType.OBSOLETE_SYNONYM
elif value == "plural_form":
return SynonymType.PLURAL_FORM
else:
if value in ("HP_0034334", "allelic_requirement"):
return SynonymType.ALLELIC_REQUIREMENT
logger.debug(f"Unknown synonym type {synonym_type}")
return None
def parse_synonym_xref(xref) -> typing.Optional[TermId]:
orcid_matcher = ORCID_PT.match(xref)
if orcid_matcher:
return TermId.from_curie(f"ORCID:{orcid_matcher.group('orcid')}")
else:
try:
# TODO: this can contain many things. Investigate..
return TermId.from_curie(xref)
except ValueError:
logger.debug(f"Unable to create a synonym xref from {xref}")
return None
def create_xrefs(meta: Meta) -> typing.Optional[typing.List[TermId]]:
if len(meta.xrefs) == 0:
return None
else:
# TODO: Expecting that all xrefs are CURIES may be a bit too naive. Investigate..
return [TermId.from_curie(xref.val) for xref in meta.xrefs]
class ObographsTermFactory(typing.Generic[MINIMAL_TERM], metaclass=abc.ABCMeta):
"""
Term factory turns `TermId` and obographs `Node` into an ontology term.
"""
@abc.abstractmethod
def create_term(self, term_id: TermId, node: Node) -> typing.Optional[MINIMAL_TERM]:
"""
Create `MinimalTerm` or a more specific instance for `TermId` and `Node`
The term may not be created at the discretion of the factory, in which case `None` is returned.
"""
pass
[docs]
class MinimalTermFactory(ObographsTermFactory[MinimalTerm]):
[docs]
def create_term(self, term_id: TermId, node: Node) -> typing.Optional[MinimalTerm]:
is_obsolete = node.meta is not None and node.meta.is_deprecated
alt_term_ids = create_alt_term_ids(node)
return MinimalTerm.create_minimal_term(term_id, node.lbl, alt_term_ids, is_obsolete)
[docs]
class TermFactory(ObographsTermFactory[Term]):
[docs]
def create_term(self, term_id: TermId, node: Node) -> typing.Optional[Term]:
if node.meta:
if node.meta.definition is not None:
d = node.meta.definition.val
xrefs = node.meta.definition.xrefs
definition = Definition(d, xrefs)
else:
definition = None
comment = ", ".join(node.meta.comments) if len(node.meta.comments) > 0 else None
alt_term_ids = create_alt_term_ids(node)
synonyms = create_synonyms(node.meta)
xrefs = create_xrefs(node.meta)
return Term.create_term(
term_id,
name=node.lbl,
alt_term_ids=alt_term_ids,
is_obsolete=node.meta.is_deprecated,
definition=definition,
comment=comment,
synonyms=synonyms,
xrefs=xrefs,
)
else:
return Term.create_term(
term_id,
name=node.lbl,
alt_term_ids=[],
is_obsolete=False,
definition=None,
comment=None,
synonyms=None,
xrefs=None,
)