Source code for dowhy.gcm.ml.regression

from abc import abstractmethod
from typing import Any

import numpy as np
import sklearn
from packaging import version
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures

if version.parse(sklearn.__version__) < version.parse("1.0"):
    from sklearn.experimental import enable_hist_gradient_boosting  # noqa

from sklearn.ensemble import (
    AdaBoostRegressor,
    ExtraTreesRegressor,
    HistGradientBoostingRegressor,
    RandomForestRegressor,
)
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.linear_model import ElasticNetCV, LassoCV, LassoLarsIC, LinearRegression, RidgeCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

from dowhy.gcm.ml.prediction_model import PredictionModel
from dowhy.gcm.util.general import auto_apply_encoders, auto_fit_encoders, shape_into_2d


[docs]class SklearnRegressionModel(PredictionModel):
    """
    General wrapper class for sklearn models.
    """

    def __init__(self, sklearn_mdl: Any) -> None:
        self._sklearn_mdl = sklearn_mdl
        self._encoders = {}

[docs]    def fit(self, X: np.ndarray, Y: np.ndarray) -> None:
        self._encoders = auto_fit_encoders(X, Y)
        X = auto_apply_encoders(X, self._encoders)

        self._sklearn_mdl.fit(X=X, y=Y.squeeze())

[docs]    def predict(self, X: np.array) -> np.ndarray:
        return shape_into_2d(self._sklearn_mdl.predict(auto_apply_encoders(X, self._encoders)))

    @property
    def sklearn_model(self) -> Any:
        return self._sklearn_mdl

[docs]    def clone(self):
        """
        Clones the prediction model using the same hyper parameters but not fitted.
        :return: An unfitted clone of the prediction model.
        """
        return SklearnRegressionModel(sklearn_mdl=sklearn.clone(self._sklearn_mdl))

    def __str__(self):
        return str(self._sklearn_mdl)


[docs]class LinearRegressionWithFixedParameter(PredictionModel):
    def __init__(self, coefficients: np.ndarray, intercept: float):
        self.coefficients = coefficients
        self.intercept = intercept

[docs]    def fit(self, X: np.ndarray, Y: np.ndarray) -> None:
        pass

[docs]    def predict(self, X: np.ndarray) -> np.ndarray:
        return (np.dot(shape_into_2d(X), self.coefficients) + self.intercept).reshape(-1, 1)

[docs]    def clone(self):
        return LinearRegressionWithFixedParameter(coefficients=self.coefficients, intercept=self.intercept)


[docs]def create_linear_regressor_with_given_parameters(
    coefficients: np.ndarray, intercept: float = 0
) -> LinearRegressionWithFixedParameter:
    return LinearRegressionWithFixedParameter(np.array(coefficients), intercept)


[docs]def create_linear_regressor(**kwargs) -> SklearnRegressionModel:
    return SklearnRegressionModel(LinearRegression(**kwargs))


[docs]def create_ridge_regressor(**kwargs) -> SklearnRegressionModel:
    return SklearnRegressionModel(RidgeCV(**kwargs))


[docs]def create_lasso_regressor(**kwargs) -> SklearnRegressionModel:
    return SklearnRegressionModel(LassoCV(**kwargs))


[docs]def create_lasso_lars_ic_regressor(**kwargs) -> SklearnRegressionModel:
    return SklearnRegressionModel(LassoLarsIC(**kwargs))


[docs]def create_elastic_net_regressor(**kwargs) -> SklearnRegressionModel:
    return SklearnRegressionModel(ElasticNetCV(**kwargs))


[docs]def create_gaussian_process_regressor(**kwargs) -> SklearnRegressionModel:
    return SklearnRegressionModel(GaussianProcessRegressor(**kwargs))


[docs]def create_support_vector_regressor(**kwargs) -> SklearnRegressionModel:
    return SklearnRegressionModel(SVR(**kwargs))


[docs]def create_random_forest_regressor(**kwargs) -> SklearnRegressionModel:
    return SklearnRegressionModel(RandomForestRegressor(**kwargs))


[docs]def create_hist_gradient_boost_regressor(**kwargs) -> SklearnRegressionModel:
    return SklearnRegressionModel(HistGradientBoostingRegressor(**kwargs))


[docs]def create_extra_trees_regressor(**kwargs) -> SklearnRegressionModel:
    return SklearnRegressionModel(ExtraTreesRegressor(**kwargs))


[docs]def create_knn_regressor(**kwargs) -> SklearnRegressionModel:
    return SklearnRegressionModel(KNeighborsRegressor(**kwargs))


[docs]def create_ada_boost_regressor(**kwargs) -> SklearnRegressionModel:
    return SklearnRegressionModel(AdaBoostRegressor(**kwargs))


[docs]def create_polynom_regressor(degree: int = 2, **kwargs_linear_model) -> SklearnRegressionModel:
    return SklearnRegressionModel(
        make_pipeline(PolynomialFeatures(degree=degree, include_bias=False), LinearRegression(**kwargs_linear_model))
    )


[docs]class InvertibleFunction:
[docs]    @abstractmethod
    def evaluate(self, X: np.ndarray) -> np.ndarray:
        """Applies the function on the input."""
        raise NotImplementedError

[docs]    @abstractmethod
    def evaluate_inverse(self, X: np.ndarray) -> np.ndarray:
        """Returns the outcome of applying the inverse of the function on the inputs."""
        raise NotImplementedError


[docs]class InvertibleIdentityFunction(InvertibleFunction):
[docs]    def evaluate(self, X: np.ndarray) -> np.ndarray:
        return X

[docs]    def evaluate_inverse(self, X: np.ndarray) -> np.ndarray:
        return X


[docs]class InvertibleExponentialFunction(InvertibleFunction):
[docs]    def evaluate(self, X: np.ndarray) -> np.ndarray:
        return np.exp(X)

[docs]    def evaluate_inverse(self, X: np.ndarray) -> np.ndarray:
        return np.log(X)


[docs]class InvertibleLogarithmicFunction(InvertibleFunction):
[docs]    def evaluate(self, X: np.ndarray) -> np.ndarray:
        return np.log(X)

[docs]    def evaluate_inverse(self, X: np.ndarray) -> np.ndarray:
        return np.exp(X)