Source code for hpotk.ontology.load.obographs._factory

import abc
import logging
import re
import typing

from hpotk.model import (
    TermId,
    Term,
    MinimalTerm,
    Synonym,
    SynonymType,
    SynonymCategory,
    Definition,
)
from ._model import Node, Meta, SynonymPropertyValue

logger = logging.getLogger(__name__)

MINIMAL_TERM = typing.TypeVar("MINIMAL_TERM", bound=MinimalTerm)

OBO_PURL_PT = re.compile(r"^http://purl\.obolibrary\.org/obo/(?P<value>.+)$")
HP_VAL_PT = re.compile(r"^hp(.*)#(?P<value>.+)$")
ORCID_PT = re.compile(r".*orcid\.org/(?P<orcid>\d{4}-\d{4}-\d{4}-\d{4})$")


def create_alt_term_ids(node: Node) -> typing.List[TermId]:
    alt_term_ids = []
    if node.meta:
        for bpv in node.meta.basic_property_values:
            if bpv.pred is not None and bpv.val is not None and bpv.pred.endswith("#hasAlternativeId"):
                alt_term_ids.append(TermId.from_curie(bpv.val))
    return alt_term_ids


def create_synonyms(meta: Meta) -> typing.Optional[typing.List[Synonym]]:
    if len(meta.synonyms) == 0:
        return None
    else:
        return [parse_synonym(s) for s in meta.synonyms]


def parse_synonym(spv: SynonymPropertyValue) -> Synonym:
    synonym_category: typing.Optional[SynonymCategory] = parse_synonym_category(spv.pred)
    synonym_type: typing.Optional[SynonymType] = parse_synonym_type(spv.synonym_type)
    if len(spv.xrefs) != 0:
        xrefs = []
        for xref in spv.xrefs:
            parsed = parse_synonym_xref(xref)
            if parsed is not None:
                xrefs.append(parsed)
        xrefs = list(xrefs) if len(xrefs) != 0 else None  # shrink to fit
    else:
        xrefs = None

    return Synonym(
        name=spv.val,
        synonym_category=synonym_category,
        synonym_type=synonym_type,
        xrefs=xrefs,
    )


def parse_synonym_category(synonym_category: str) -> typing.Optional[SynonymCategory]:
    if synonym_category == "hasRelatedSynonym":
        return SynonymCategory.RELATED
    elif synonym_category == "hasExactSynonym":
        return SynonymCategory.EXACT
    elif synonym_category == "hasBroadSynonym":
        return SynonymCategory.BROAD
    elif synonym_category == "hasNarrowSynonym":
        return SynonymCategory.NARROW
    else:
        logger.debug(f"Unknown synonym category {synonym_category}")
        return None


def parse_synonym_type(synonym_type: str) -> typing.Optional[SynonymType]:
    if synonym_type is None or len(synonym_type) == 0:
        return None
    hp_obo_matcher = OBO_PURL_PT.match(synonym_type)
    if hp_obo_matcher:
        value = hp_obo_matcher.group("value")
        hp_matcher = HP_VAL_PT.match(value)
        if hp_matcher:
            value = hp_matcher.group("value")
            if value in ("layperson", "layperson term"):
                return SynonymType.LAYPERSON_TERM
            elif value == "abbreviation":
                return SynonymType.ABBREVIATION
            elif value == "uk_spelling":
                return SynonymType.UK_SPELLING
            elif value == "obsolete_synonym":
                return SynonymType.OBSOLETE_SYNONYM
            elif value == "plural_form":
                return SynonymType.PLURAL_FORM
        else:
            if value in ("HP_0034334", "allelic_requirement"):
                return SynonymType.ALLELIC_REQUIREMENT

    logger.debug(f"Unknown synonym type {synonym_type}")
    return None


def parse_synonym_xref(xref) -> typing.Optional[TermId]:
    orcid_matcher = ORCID_PT.match(xref)
    if orcid_matcher:
        return TermId.from_curie(f"ORCID:{orcid_matcher.group('orcid')}")
    else:
        try:
            # TODO: this can contain many things. Investigate..
            return TermId.from_curie(xref)
        except ValueError:
            logger.debug(f"Unable to create a synonym xref from {xref}")
            return None


def create_xrefs(meta: Meta) -> typing.Optional[typing.List[TermId]]:
    if len(meta.xrefs) == 0:
        return None
    else:
        # TODO: Expecting that all xrefs are CURIES may be a bit too naive. Investigate..
        return [TermId.from_curie(xref.val) for xref in meta.xrefs]


class ObographsTermFactory(typing.Generic[MINIMAL_TERM], metaclass=abc.ABCMeta):
    """
    Term factory turns `TermId` and obographs `Node` into an ontology term.
    """

    @abc.abstractmethod
    def create_term(self, term_id: TermId, node: Node) -> typing.Optional[MINIMAL_TERM]:
        """
        Create `MinimalTerm` or a more specific instance for `TermId` and `Node`

        The term may not be created at the discretion of the factory, in which case `None` is returned.
        """
        pass


[docs] class MinimalTermFactory(ObographsTermFactory[MinimalTerm]):
[docs] def create_term(self, term_id: TermId, node: Node) -> typing.Optional[MinimalTerm]: is_obsolete = node.meta is not None and node.meta.is_deprecated alt_term_ids = create_alt_term_ids(node) return MinimalTerm.create_minimal_term(term_id, node.lbl, alt_term_ids, is_obsolete)
[docs] class TermFactory(ObographsTermFactory[Term]):
[docs] def create_term(self, term_id: TermId, node: Node) -> typing.Optional[Term]: if node.meta: if node.meta.definition is not None: d = node.meta.definition.val xrefs = node.meta.definition.xrefs definition = Definition(d, xrefs) else: definition = None comment = ", ".join(node.meta.comments) if len(node.meta.comments) > 0 else None alt_term_ids = create_alt_term_ids(node) synonyms = create_synonyms(node.meta) xrefs = create_xrefs(node.meta) return Term.create_term( term_id, name=node.lbl, alt_term_ids=alt_term_ids, is_obsolete=node.meta.is_deprecated, definition=definition, comment=comment, synonyms=synonyms, xrefs=xrefs, ) else: return Term.create_term( term_id, name=node.lbl, alt_term_ids=[], is_obsolete=False, definition=None, comment=None, synonyms=None, xrefs=None, )