Source code for dowhy.gcm.ml.regression
from abc import abstractmethod
from typing import Any
import numpy as np
import sklearn
from packaging import version
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
if version.parse(sklearn.__version__) < version.parse("1.0"):
from sklearn.experimental import enable_hist_gradient_boosting # noqa
from sklearn.ensemble import (
AdaBoostRegressor,
ExtraTreesRegressor,
HistGradientBoostingRegressor,
RandomForestRegressor,
)
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.linear_model import ElasticNetCV, LassoCV, LassoLarsIC, LinearRegression, RidgeCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from dowhy.gcm.ml.prediction_model import PredictionModel
from dowhy.gcm.util.general import auto_apply_encoders, auto_fit_encoders, shape_into_2d
[docs]class SklearnRegressionModel(PredictionModel):
"""
General wrapper class for sklearn models.
"""
def __init__(self, sklearn_mdl: Any) -> None:
self._sklearn_mdl = sklearn_mdl
self._encoders = {}
[docs] def fit(self, X: np.ndarray, Y: np.ndarray) -> None:
self._encoders = auto_fit_encoders(X, Y)
X = auto_apply_encoders(X, self._encoders)
self._sklearn_mdl.fit(X=X, y=Y.squeeze())
[docs] def predict(self, X: np.array) -> np.ndarray:
return shape_into_2d(self._sklearn_mdl.predict(auto_apply_encoders(X, self._encoders)))
@property
def sklearn_model(self) -> Any:
return self._sklearn_mdl
[docs] def clone(self):
"""
Clones the prediction model using the same hyper parameters but not fitted.
:return: An unfitted clone of the prediction model.
"""
return SklearnRegressionModel(sklearn_mdl=sklearn.clone(self._sklearn_mdl))
def __str__(self):
return str(self._sklearn_mdl)
[docs]class LinearRegressionWithFixedParameter(PredictionModel):
def __init__(self, coefficients: np.ndarray, intercept: float):
self.coefficients = coefficients
self.intercept = intercept
[docs] def fit(self, X: np.ndarray, Y: np.ndarray) -> None:
pass
[docs] def predict(self, X: np.ndarray) -> np.ndarray:
return (np.dot(shape_into_2d(X), self.coefficients) + self.intercept).reshape(-1, 1)
[docs] def clone(self):
return LinearRegressionWithFixedParameter(coefficients=self.coefficients, intercept=self.intercept)
[docs]def create_linear_regressor_with_given_parameters(
coefficients: np.ndarray, intercept: float = 0
) -> LinearRegressionWithFixedParameter:
return LinearRegressionWithFixedParameter(np.array(coefficients), intercept)
[docs]def create_linear_regressor(**kwargs) -> SklearnRegressionModel:
return SklearnRegressionModel(LinearRegression(**kwargs))
[docs]def create_ridge_regressor(**kwargs) -> SklearnRegressionModel:
return SklearnRegressionModel(RidgeCV(**kwargs))
[docs]def create_lasso_regressor(**kwargs) -> SklearnRegressionModel:
return SklearnRegressionModel(LassoCV(**kwargs))
[docs]def create_lasso_lars_ic_regressor(**kwargs) -> SklearnRegressionModel:
return SklearnRegressionModel(LassoLarsIC(**kwargs))
[docs]def create_elastic_net_regressor(**kwargs) -> SklearnRegressionModel:
return SklearnRegressionModel(ElasticNetCV(**kwargs))
[docs]def create_gaussian_process_regressor(**kwargs) -> SklearnRegressionModel:
return SklearnRegressionModel(GaussianProcessRegressor(**kwargs))
[docs]def create_support_vector_regressor(**kwargs) -> SklearnRegressionModel:
return SklearnRegressionModel(SVR(**kwargs))
[docs]def create_random_forest_regressor(**kwargs) -> SklearnRegressionModel:
return SklearnRegressionModel(RandomForestRegressor(**kwargs))
[docs]def create_hist_gradient_boost_regressor(**kwargs) -> SklearnRegressionModel:
return SklearnRegressionModel(HistGradientBoostingRegressor(**kwargs))
[docs]def create_knn_regressor(**kwargs) -> SklearnRegressionModel:
return SklearnRegressionModel(KNeighborsRegressor(**kwargs))
[docs]def create_ada_boost_regressor(**kwargs) -> SklearnRegressionModel:
return SklearnRegressionModel(AdaBoostRegressor(**kwargs))
[docs]def create_polynom_regressor(degree: int = 2, **kwargs_linear_model) -> SklearnRegressionModel:
return SklearnRegressionModel(
make_pipeline(PolynomialFeatures(degree=degree, include_bias=False), LinearRegression(**kwargs_linear_model))
)
[docs]class InvertibleFunction:
[docs] @abstractmethod
def evaluate(self, X: np.ndarray) -> np.ndarray:
"""Applies the function on the input."""
raise NotImplementedError
[docs] @abstractmethod
def evaluate_inverse(self, X: np.ndarray) -> np.ndarray:
"""Returns the outcome of applying the inverse of the function on the inputs."""
raise NotImplementedError
[docs]class InvertibleIdentityFunction(InvertibleFunction):
[docs] def evaluate(self, X: np.ndarray) -> np.ndarray:
return X
[docs] def evaluate_inverse(self, X: np.ndarray) -> np.ndarray:
return X
[docs]class InvertibleExponentialFunction(InvertibleFunction):
[docs] def evaluate(self, X: np.ndarray) -> np.ndarray:
return np.exp(X)
[docs] def evaluate_inverse(self, X: np.ndarray) -> np.ndarray:
return np.log(X)
[docs]class InvertibleLogarithmicFunction(InvertibleFunction):
[docs] def evaluate(self, X: np.ndarray) -> np.ndarray:
return np.log(X)
[docs] def evaluate_inverse(self, X: np.ndarray) -> np.ndarray:
return np.exp(X)