Source code for pywhy_stats.api

from enum import Enum
from types import ModuleType
from typing import Optional
from warnings import warn

import numpy as np
import scipy.stats
from numpy.typing import ArrayLike

from pywhy_stats.independence import fisherz, kci

from .pvalue_result import PValueResult


[docs]class Methods(Enum): """Methods for independence testing.""" AUTO = 0 """Choose an automatic method based on the data.""" FISHERZ = fisherz """:py:mod:`pywhy_stats.independence.fisherz`: Fisher's Z test for independence""" KCI = kci """:py:mod:`pywhy_stats.independence.kci`: Conditional kernel independence test"""
[docs]def independence_test( X: ArrayLike, Y: ArrayLike, condition_on: Optional[ArrayLike] = None, method=Methods.AUTO, **kwargs, ) -> PValueResult: """Perform a (conditional) independence test to determine whether X and Y are independent. The test may be conditioned on an optional set of variables. This is, test whether ``X _||_ Y | condition_on``, where the null hypothesis is that X and Y are independent. Parameters ---------- X : ArrayLike, shape (n_samples, n_features_x) Data matrix for X. Y : ArrayLike, shape (n_samples, n_features_y) Data matrix for Y. condition_on : ArrayLike or None, shape (n_samples, n_features_z), optional Data matrix for the conditioning variables. If None is given, an unconditional test is performed. method : Methods, optional Independence test method from the :class:`pywhy_stats.Methods` enum. Default is `Methods.AUTO`, which will automatically select an appropriate method. **kwargs : dict or None, optional Additional keyword arguments to be passed to the specific test method Returns ------- result : PValueResult An instance of the PValueResult data class, containing the p-value, test statistic, and any additional information related to the independence test. See Also -------- pywhy_stats.independence.fisherz : Fisher's Z test for independence pywhy_stats.independence.kci : Kernel Conditional Independence test """ method_module: ModuleType if method == Methods.AUTO: method_module = Methods.KCI elif not isinstance(method, Methods): raise ValueError( f"Invalid method type. Expected one of {Methods.__members__.keys()}, " f"but got {method}." ) else: method_module = method # type: ignore if method_module == Methods.FISHERZ: if condition_on is None: data = [X, Y] else: data = [X, Y, condition_on] for _data in data: res = scipy.stats.normaltest(_data, axis=0) # XXX: we should add pinguoin as an optional dependency for doing multi-comp stuff if np.atleast_1d(res.pvalue).any() < 0.05: warn( "The provided data does not seem to be Gaussian, but the Fisher-Z test " "assumes that the data follows a Gaussian distribution. The result should " "be interpreted carefully or consider a different independence test method." ) if condition_on is None: return method_module.value.ind(X, Y, **kwargs) else: return method_module.value.condind(X, Y, condition_on, **kwargs)