Source code for hpotk.model._term_id

import abc


[docs] class TermId(metaclass=abc.ABCMeta): """ `TermId` is an identifier of an ontology concept. `TermId` consists of a *prefix* and *id* that are separated by a delimiter: .. doctest:: term-id >>> term_id = TermId.from_curie('HP:0001250') >>> assert term_id.prefix == 'HP' >>> assert term_id.id == '0001250' The `TermId` has a natural ordering which compares two IDs first based on prefix and then value. Both comparisons are lexicographic. """
[docs] @staticmethod def from_curie(curie: str): """ Create a `TermId` from a compact URI (CURIE). The prefix and id of a `TermId` must be separated either by a colon ``:`` or an underscore ``_``. .. doctest:: term-id >>> term_id = TermId.from_curie('HP:0001250') >>> term_id.value 'HP:0001250' The parsing will forget the original delimiter. The `value` always joins the *prefix* and *id* with ``:``. .. doctest:: term-id >>> ncit = TermId.from_curie('NCIT_C3117') >>> ncit.value 'NCIT:C3117' The ``:`` has higher priority than ``_``, and it will be used as delimiter. .. doctest:: term-id >>> snomed = TermId.from_curie('SNOMEDCT_US:128613002') >>> snomed.prefix 'SNOMEDCT_US' >>> snomed.id '128613002' :param curie: a CURIE `str` to be parsed. :return: the created `TermId`. :raises: `ValueError` if the value is mis-formatted. """ if curie is None: raise ValueError(f'Curie must not be None') try: idx = curie.index(':') except ValueError: try: idx = curie.index('_') except ValueError: raise ValueError(f'The CURIE {curie} has no colon `:` or underscore `_`') return DefaultTermId(idx=idx, value=curie)
@property @abc.abstractmethod def prefix(self) -> str: """ Get `prefix` of the ontology concept. .. doctest:: term-id >>> term_id = TermId.from_curie('HP:1234567') >>> term_id.prefix 'HP' """ pass @property @abc.abstractmethod def id(self) -> str: """ Get `id` of the ontology concept. .. doctest:: term-id >>> term_id = TermId.from_curie('HP:1234567') >>> term_id.id '1234567' """ pass @property def value(self) -> str: """ Get concept value consisting of `self.prefix` and `self.value`. .. doctest:: term-id >>> term_id = TermId.from_curie('HP:1234567') >>> term_id.value 'HP:1234567' """ return self.prefix + ':' + self.id @staticmethod def _calculate_hash(prefix: str, id: str) -> int: """ Calculate hash of a term ID from the members. :param prefix: the prefix part of the term ID (e.g. `HP` for `HP:1234567`) as a `str. :param id: the id part of the term ID (e.g. `1234567` for `HP:1234567`) as a str. :return: a hash as an `int`. """ return hash((prefix, id)) def __hash__(self) -> int: return self._calculate_hash(self.prefix, self.id) def __eq__(self, other): return isinstance(other, TermId) \ and self.prefix == other.prefix \ and self.id == other.id def __lt__(self, other): if isinstance(other, TermId): if self.prefix == other.prefix: return self.id < other.id else: return self.prefix < other.prefix else: return NotImplemented def __str__(self): return self.value
class DefaultTermId(TermId): """ A default implementation of :class:`TermId` that stores the index of the delimiter and the value as a string and caches the hash value. """ def __init__(self, value: str, idx: int): self._value = value self._idx = idx self._hash = self._calculate_hash(prefix=value[:idx], id=value[idx + 1:]) @property def prefix(self) -> str: return self._value[:self._idx] @property def id(self) -> str: return self._value[self._idx + 1:] def __repr__(self): return f'DefaultTermId(idx={self._idx}, value={self._value})' def __hash__(self) -> int: return self._hash # TODO - make specific HPO TermId class SimpleTermId(TermId): """ The simplest possible implementation of a `TermId` that stores the entire curie and the position of the delimiter that separates the prefix and the id. """ def __init__(self, value: str, idx: int): self._value = value self._idx = idx @property def prefix(self) -> str: return self._value[:self._idx] @property def id(self) -> str: return self._value[self._idx + 1:] def __repr__(self): return f'SimpleTermId(idx={self._idx}, value={self._value})'