Source code for dowhy.gcm.independence_test.kernel_operation
"""Functions in this module should be considered experimental, meaning there might be breaking API changes in the
future.
"""
from typing import Optional
import numpy as np
from sklearn.kernel_approximation import Nystroem
from sklearn.metrics import euclidean_distances
from dowhy.gcm.util.general import shape_into_2d
[docs]def apply_rbf_kernel(X: np.ndarray, precision: Optional[float] = None) -> np.ndarray:
"""
Estimates the RBF (Gaussian) kernel for the given input data.
:param X: Input data.
:param precision: Specific precision matrix for the RBF kernel. If None is given, this is inferred from the data.
:return: The outcome of applying a RBF (Gaussian) kernel on the data.
"""
X = shape_into_2d(X)
distance_matrix = euclidean_distances(X, squared=True)
if precision is None:
precision = _median_based_precision(distance_matrix)
return np.exp(-precision * distance_matrix)
[docs]def apply_rbf_kernel_with_adaptive_precision(X: np.ndarray) -> np.ndarray:
"""Estimates the RBF (Gaussian) kernel for the given input data. Here, each column is scaled by an individual
precision parameter which is automatically inferred from the data.
:param X: Input data.
:return: The outcome of applying a RBF (Gaussian) kernel on the data.
"""
X = shape_into_2d(X)
result = np.ones((X.shape[0], X.shape[0]))
for i in range(X.shape[1]):
distance_matrix = euclidean_distances(X, squared=True)
result *= np.exp(-_median_based_precision(distance_matrix) * distance_matrix)
return result
[docs]def apply_delta_kernel(X: np.ndarray) -> np.ndarray:
"""Applies the delta kernel, i.e. the distance is 1 if two entries are equal and 0 otherwise.
:param X: Input data.
:return: The outcome of the delta-kernel, a binary distance matrix.
"""
X = shape_into_2d(X)
return np.array(list(map(lambda value: value == X, X))).reshape(X.shape[0], X.shape[0]).astype(np.float)
[docs]def approximate_rbf_kernel_features(
X: np.ndarray, num_random_components: int, precision: Optional[float] = None
) -> np.ndarray:
"""Applies the Nystroem method to create a NxD (D << N) approximated RBF kernel map using a subset of the data,
where N is the number of samples in X and D the number of components.
:param X: Input data.
:param num_random_components: Number of components D for the approximated kernel map.
:param precision: Specific precision matrix for the RBF kernel. If None is given, this is inferred from the data.
:return: A NxD approximated RBF kernel map, where N is the number of samples in X and D the number of components.
"""
X = shape_into_2d(X)
if precision is None:
precision = _median_based_precision(euclidean_distances(X, squared=True))
return Nystroem(kernel="rbf", gamma=precision, n_components=num_random_components).fit_transform(X)
[docs]def approximate_delta_kernel_features(X: np.ndarray, num_random_components: int) -> np.ndarray:
"""Applies the Nystroem method to create a NxD (D << N) approximated delta kernel map using a subset of the data,
where N is the number of samples in X and D the number of components. The delta kernel gives 1 if two entries are
equal and 0 otherwise.
:param X: Input data.
:param num_random_components: Number of components D for the approximated kernel map.
:return: A NxD approximated RBF kernel map, where N is the number of samples in X and D the number of components.
"""
X = shape_into_2d(X)
def delta_function(x, y) -> float:
return float(x == y)
for i, unique_element in enumerate(np.unique(X)):
X[X == unique_element] = i
result = Nystroem(kernel=delta_function, n_components=num_random_components).fit_transform(X.astype(int))
result[result != 0] = 1
return result
def _median_based_precision(distances: np.ndarray) -> float:
tmp = np.sqrt(distances)
tmp = tmp - np.tril(tmp, -1)
tmp = tmp.reshape(-1, 1)
return 1 / np.median(tmp[tmp > 0])