Source code for gecco.interpro
"""Simple data classes to expose embedded InterPro data.
"""
import gzip
import json
from dataclasses import dataclass, field, fields
from typing import Dict, List, Optional
try:
from importlib.resources import files
except ImportError:
from importlib_resources import files # type: ignore
__all__ = ["InterProEntry", "InterPro", "GeneOntologyTerm"]
@dataclass
class GOTerm:
"""A single term from the Gene Ontology.
"""
accession: str
name: str
namespace: str
def __eq__(self, other: object) -> bool:
if not isinstance(other, type(self)):
return NotImplemented
return (
self.accession == other.accession
and self.name == other.name
and self.namespace == other.namespace
)
def __hash__(self) -> int:
return hash((type(self), self.accession, self.name, self.namespace))
[docs]@dataclass
class InterProEntry:
"""A single entry in the InterPro database.
"""
accession: str
members: List[str]
name: str
databases: List[str]
type: str
go_terms: List[GOTerm]
go_functions: List[GOTerm]
[docs]@dataclass
class InterPro:
"""A subset of the InterPro database exposing domain metadata.
"""
entries: List[InterProEntry]
def __init__(self, entries: List[InterProEntry]):
self.entries = entries
self.by_accession = { member:entry for entry in entries for member in entry.members }
@classmethod
def load(cls) -> "InterPro":
with files(__name__).joinpath("interpro.json").open() as f:
data = json.load(f)
entries = []
for raw_entry in data:
# get terms corresponding to domain
go_terms = [
GOTerm(**t) for t in raw_entry.pop("go_terms")
]
go_functions = [
GOTerm(**t, namespace="molecular_function")
for t in raw_entry.pop("go_functions")
]
entries.append(
InterProEntry(**raw_entry, go_terms=go_terms, go_functions=go_functions)
)
return cls(entries)