Source code for dowhy.gcm.ml.regression

from abc import abstractmethod
from typing import Any

import numpy as np
import sklearn
from packaging import version
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures

if version.parse(sklearn.__version__) < version.parse("1.0"):
    from sklearn.experimental import enable_hist_gradient_boosting  # noqa

from sklearn.ensemble import (
    AdaBoostRegressor,
    ExtraTreesRegressor,
    HistGradientBoostingRegressor,
    RandomForestRegressor,
)
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.linear_model import ElasticNetCV, LassoCV, LassoLarsIC, LinearRegression, RidgeCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

from dowhy.gcm.ml.prediction_model import PredictionModel
from dowhy.gcm.util.general import auto_apply_encoders, auto_fit_encoders, shape_into_2d


[docs]class SklearnRegressionModel(PredictionModel): """ General wrapper class for sklearn models. """ def __init__(self, sklearn_mdl: Any) -> None: self._sklearn_mdl = sklearn_mdl self._encoders = {}
[docs] def fit(self, X: np.ndarray, Y: np.ndarray) -> None: self._encoders = auto_fit_encoders(X, Y) X = auto_apply_encoders(X, self._encoders) self._sklearn_mdl.fit(X=X, y=Y.squeeze())
[docs] def predict(self, X: np.array) -> np.ndarray: return shape_into_2d(self._sklearn_mdl.predict(auto_apply_encoders(X, self._encoders)))
@property def sklearn_model(self) -> Any: return self._sklearn_mdl
[docs] def clone(self): """ Clones the prediction model using the same hyper parameters but not fitted. :return: An unfitted clone of the prediction model. """ return SklearnRegressionModel(sklearn_mdl=sklearn.clone(self._sklearn_mdl))
def __str__(self): return str(self._sklearn_mdl)
[docs]class LinearRegressionWithFixedParameter(PredictionModel): def __init__(self, coefficients: np.ndarray, intercept: float): self.coefficients = coefficients self.intercept = intercept
[docs] def fit(self, X: np.ndarray, Y: np.ndarray) -> None: pass
[docs] def predict(self, X: np.ndarray) -> np.ndarray: return (np.dot(shape_into_2d(X), self.coefficients) + self.intercept).reshape(-1, 1)
[docs] def clone(self): return LinearRegressionWithFixedParameter(coefficients=self.coefficients, intercept=self.intercept)
[docs]def create_linear_regressor_with_given_parameters( coefficients: np.ndarray, intercept: float = 0 ) -> LinearRegressionWithFixedParameter: return LinearRegressionWithFixedParameter(np.array(coefficients), intercept)
[docs]def create_linear_regressor(**kwargs) -> SklearnRegressionModel: return SklearnRegressionModel(LinearRegression(**kwargs))
[docs]def create_ridge_regressor(**kwargs) -> SklearnRegressionModel: return SklearnRegressionModel(RidgeCV(**kwargs))
[docs]def create_lasso_regressor(**kwargs) -> SklearnRegressionModel: return SklearnRegressionModel(LassoCV(**kwargs))
[docs]def create_lasso_lars_ic_regressor(**kwargs) -> SklearnRegressionModel: return SklearnRegressionModel(LassoLarsIC(**kwargs))
[docs]def create_elastic_net_regressor(**kwargs) -> SklearnRegressionModel: return SklearnRegressionModel(ElasticNetCV(**kwargs))
[docs]def create_gaussian_process_regressor(**kwargs) -> SklearnRegressionModel: return SklearnRegressionModel(GaussianProcessRegressor(**kwargs))
[docs]def create_support_vector_regressor(**kwargs) -> SklearnRegressionModel: return SklearnRegressionModel(SVR(**kwargs))
[docs]def create_random_forest_regressor(**kwargs) -> SklearnRegressionModel: return SklearnRegressionModel(RandomForestRegressor(**kwargs))
[docs]def create_hist_gradient_boost_regressor(**kwargs) -> SklearnRegressionModel: return SklearnRegressionModel(HistGradientBoostingRegressor(**kwargs))
[docs]def create_extra_trees_regressor(**kwargs) -> SklearnRegressionModel: return SklearnRegressionModel(ExtraTreesRegressor(**kwargs))
[docs]def create_knn_regressor(**kwargs) -> SklearnRegressionModel: return SklearnRegressionModel(KNeighborsRegressor(**kwargs))
[docs]def create_ada_boost_regressor(**kwargs) -> SklearnRegressionModel: return SklearnRegressionModel(AdaBoostRegressor(**kwargs))
[docs]def create_polynom_regressor(degree: int = 2, **kwargs_linear_model) -> SklearnRegressionModel: return SklearnRegressionModel( make_pipeline(PolynomialFeatures(degree=degree, include_bias=False), LinearRegression(**kwargs_linear_model)) )
[docs]class InvertibleFunction:
[docs] @abstractmethod def evaluate(self, X: np.ndarray) -> np.ndarray: """Applies the function on the input.""" raise NotImplementedError
[docs] @abstractmethod def evaluate_inverse(self, X: np.ndarray) -> np.ndarray: """Returns the outcome of applying the inverse of the function on the inputs.""" raise NotImplementedError
[docs]class InvertibleIdentityFunction(InvertibleFunction):
[docs] def evaluate(self, X: np.ndarray) -> np.ndarray: return X
[docs] def evaluate_inverse(self, X: np.ndarray) -> np.ndarray: return X
[docs]class InvertibleExponentialFunction(InvertibleFunction):
[docs] def evaluate(self, X: np.ndarray) -> np.ndarray: return np.exp(X)
[docs] def evaluate_inverse(self, X: np.ndarray) -> np.ndarray: return np.log(X)
[docs]class InvertibleLogarithmicFunction(InvertibleFunction):
[docs] def evaluate(self, X: np.ndarray) -> np.ndarray: return np.log(X)
[docs] def evaluate_inverse(self, X: np.ndarray) -> np.ndarray: return np.exp(X)