Source code for anml.data.data_specs

"""
===================
Data Specifications
===================

Gives data specifications that are used in
:class:`~anml.data.data.Data`.

A :class:`~anml.data.data.Data` class can be subclassed
for use in applications that have other standard columns outside
of the three default
"""

from dataclasses import dataclass
from typing import List
import pandas as pd

from anml.exceptions import ANMLError


[docs]class DataSpecCompatibilityError(ANMLError): """Error raised when the data specs are not compatible with the data frame to be used.""" pass
[docs]@dataclass class DataSpecs: col_obs: str col_obs_se: str = None col_groups: List[str] = None def __post_init__(self): pass @property def _attrs(self): return vars(self) @property def _col_attributes(self): return list(k for k in self._attrs if self._attrs[k] is not None) @property def _data_attributes(self): return list(k for k in self._attrs.values() if k is not None) def _validate_df(self, df: pd.DataFrame): """Validates the existing Parameters ---------- df A pandas.DataFrame to be validated with these specifications. """ for column in self._data_attributes: if column is None: continue if isinstance(column, str): column = [column] for col in column: if col not in df.columns: raise DataSpecCompatibilityError(f"{col} is not in data columns: {df.columns}")
def _check_compatible_specs(specs: List[DataSpecs]): first_spec = specs[0] for i, spec in enumerate(specs[1:]): if sorted(spec._col_attributes) != sorted(first_spec._col_attributes): raise DataSpecCompatibilityError( "At least one data spec is different." f"Columns in spec 1 are {spec._col_attributes}." f"Columns in spec {i+2} are {spec._col_attributes}." ) for attribute in first_spec._col_attributes: attr_type = type(getattr(first_spec, attribute)) for spec in specs[1:]: if not isinstance(getattr(spec, attribute), attr_type): raise DataSpecCompatibilityError( "At least one data spec type is different." f"The attribute {attribute} should be of type {attr_type}." )