import abc
import enum
import io
import logging
import os
import shutil
import typing
from hpotk.ontology import MinimalOntology, Ontology
from hpotk.ontology.load.obographs import load_minimal_ontology, load_ontology
from hpotk.util import validate_instance
[docs]
class OntologyType(enum.Enum):
"""
Enum with the ontologies supported by the :class:`OntologyStore`.
"""
HPO = 'HPO', 'HP'
"""
Human Phenotype Ontology.
"""
MAxO = 'MAxO', 'MAXO'
"""
Medical Action Ontology.
"""
MONDO = 'MONDO', 'MONDO'
"""
Mondo Disease Ontology.
"""
def __new__(cls, *args, **kwargs):
obj = object.__new__(cls)
obj._value_ = args[0]
return obj
def __init__(self, _: str, identifier: str):
self._id_ = identifier
@property
def identifier(self) -> str:
"""
Get a `str` with the ontology identifier (e.g. ``HP`` for HPO).
>>> from hpotk.store import OntologyType
>>> OntologyType.HPO.identifier
'HP'
>>> OntologyType.MAxO.identifier
'MAXO'
"""
return self._id_
[docs]
class RemoteOntologyService(metaclass=abc.ABCMeta):
"""
`RemoteOntologyService` knows how to open a :class:`typing.BinaryIO`
for reading content of an `ontology_type` of a particular `release`.
"""
[docs]
@abc.abstractmethod
def fetch_ontology(
self,
ontology_type: OntologyType,
release: str,
) -> io.BufferedIOBase:
"""
Open a connection for reading bytes of the `ontology_type` from a remote resource.
:param ontology_type: the desired ontology kind, e.g. :class:`OntologyType.HPO`.
:param release: a `str` with the desired ontology release.
:return: a binary IO for reading the ontology data.
"""
pass
[docs]
class OntologyReleaseService(metaclass=abc.ABCMeta):
"""
`OntologyReleaseService` knows how to fetch ontology release tags, such as `v2023-10-09` for HPO.
"""
[docs]
class OntologyStore:
"""
`OntologyStore` stores versions of the supported ontologies.
"""
def __init__(
self,
store_dir: str,
ontology_release_service: OntologyReleaseService,
remote_ontology_service: RemoteOntologyService,
):
self._logger = logging.getLogger(__name__)
self._store_dir = store_dir
self._ontology_release_service = validate_instance(
ontology_release_service, OntologyReleaseService, 'ontology_release_service',
)
self._remote_ontology_service = validate_instance(
remote_ontology_service, RemoteOntologyService, 'remote_ontology_service',
)
[docs]
def load_minimal_ontology(
self,
ontology_type: OntologyType,
release: typing.Optional[str] = None,
**kwargs,
) -> MinimalOntology:
"""
Load a `release` of a given `ontology_type` as a minimal ontology.
:param ontology_type: the desired ontology type, see :class:`OntologyType` for a list of supported ontologies.
:param release: a `str` with the ontology release tag or `None` if the latest ontology should be fetched.
:param kwargs: key-value arguments passed to the low-level loader function (currently :func:`load_minimal_ontology`).
:return: a minimal ontology.
"""
return self._impl_load_ontology(
load_minimal_ontology,
ontology_type,
release,
**kwargs,
)
[docs]
def load_ontology(
self,
ontology_type: OntologyType,
release: typing.Optional[str] = None,
**kwargs,
) -> Ontology:
"""
Load a `release` of a given `ontology_type` as an ontology.
:param ontology_type: the desired ontology type, see :class:`OntologyType` for a list of supported ontologies.
:param release: a `str` with the ontology release tag or `None` if the latest ontology should be fetched.
:param kwargs: key-value arguments passed to the low-level loader function (currently :func:`load_ontology`).
:return: an ontology.
:raises ValueError: if the `release` corresponds to a non-existing ontology release.
"""
return self._impl_load_ontology(
load_ontology,
ontology_type,
release,
**kwargs,
)
@property
def store_dir(self) -> str:
"""
Get a `str` with a platform specific absolute path to the data directory.
The data directory points to `$HOME/.hpo-toolkit` on UNIX and `$HOME/hpo-toolkit` on Windows.
The folder is created if it does not exist.
"""
return self._store_dir
[docs]
def load_minimal_hpo(
self,
release: typing.Optional[str] = None,
) -> MinimalOntology:
"""
A convenience method for loading a specific HPO release.
:param release: an optional `str` with the desired HPO release (if `None`, the latest HPO will be provided).
:return: a :class:`hpotk.MinimalOntology` with the HPO data.
:raises ValueError: if the `release` corresponds to a non-existing HPO release.
"""
return self.load_minimal_ontology(
OntologyType.HPO,
release=release,
prefixes_of_interest={'HP'},
)
[docs]
def load_hpo(
self,
release: typing.Optional[str] = None,
) -> Ontology:
"""
A convenience method for loading a specific HPO release.
:param release: an optional `str` with the desired HPO release (if `None`, the latest HPO will be provided).
:return: a :class:`hpotk.Ontology` with the HPO data.
:raises ValueError: if the `release` corresponds to a non-existing HPO release.
"""
return self.load_ontology(
OntologyType.HPO,
release=release,
prefixes_of_interest={'HP'},
)
[docs]
def clear(
self,
ontology_type: typing.Optional[OntologyType] = None,
):
"""
Clear all ontology resources or resources of selected `ontology_type`.
:param ontology_type: the ontology to be cleared or `None` if resources of *all* ontologies should be cleared.
"""
to_delete = []
if ontology_type is None:
to_delete.extend(os.listdir(self._store_dir))
else:
to_delete.append(os.path.join(self._store_dir, ontology_type.identifier))
for item in to_delete:
full_path = os.path.join(self._store_dir, item)
if os.path.isdir(full_path):
shutil.rmtree(full_path)
else:
os.remove(full_path)
[docs]
def resolve_store_path(
self,
ontology_type: OntologyType,
release: typing.Optional[str] = None,
) -> str:
"""
Resolve the path of the ontology resource (e.g. HPO `hp.json` file) within the ontology store.
Note, the path points to the location of the ontology resource in the local filesystem.
The path may point to a non-existing file, if the load function has not been run yet.
**Example**
>>> import hpotk
>>> store = hpotk.configure_ontology_store()
>>> store.resolve_store_path(hpotk.store.OntologyType.HPO, release='v2023-10-09') # doctest: +SKIP
'/home/user/.hpo-toolkit/HP/hp.v2023-10-09.json'
:param ontology_type: the desired ontology type, see :class:`OntologyType` for a list of supported ontologies.
:param release: an optional `str` with the desired ontology release (if `None`, the latest ontology will be provided).
:return: a `str` with path to the ontology resource.
"""
fdir_ontology = os.path.join(self._store_dir, ontology_type.identifier)
if release is None:
# Fetch the latest release tag, assuming the lexicographic tag sort order.
release = self._fetch_latest_release_if_missing(ontology_type)
return os.path.join(
fdir_ontology, f"{ontology_type.identifier.lower()}.{release}.json"
)
def _fetch_latest_release_if_missing(
self,
ontology_type: OntologyType,
) -> str:
"""
Retrieve the latest release tag of the given `ontology_type`.
:param ontology_type: the ontology resource of interest
:return: a `str` with the latest ontology tag
:raises ValueError` if unable to retrieve the latest release tag from the ontology release service
"""
# Fetch the latest release tag, assuming the lexicographic tag sort order.
latest_tag = max(
self._ontology_release_service.fetch_tags(ontology_type), default=None
)
if latest_tag is None:
raise ValueError(f"Unable to retrieve the latest tag for {ontology_type}")
return latest_tag
def _impl_load_ontology(
self,
loader_func,
ontology_type: OntologyType,
release: typing.Optional[str] = None,
**kwargs,
):
if release is None:
release = self._fetch_latest_release_if_missing(ontology_type)
fpath_ontology = self.resolve_store_path(ontology_type, release)
# Download ontology if missing.
if not os.path.isfile(fpath_ontology):
fdir_ontology = os.path.dirname(fpath_ontology)
os.makedirs(fdir_ontology, exist_ok=True)
with self._remote_ontology_service.fetch_ontology(
ontology_type, release
) as response, open(fpath_ontology, "wb") as fh_ontology:
fh_ontology.write(response.read())
self._logger.debug("Stored the ontology at %s", fpath_ontology)
# Load the ontology
return loader_func(fpath_ontology, **kwargs)