import logging
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import Lasso, LinearRegression, Ridge, RidgeCV, SGDRegressor
from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from dowhy.causal_refuters.reisz import get_generic_regressor
from dowhy.utils.regression import get_numeric_features
[docs]class PartialLinearSensitivityAnalyzer:
"""
Class to perform sensitivity analysis for partially linear model.
An efficient version of the non parametric sensitivity analyzer that works for estimators that return residuals of regression from confounders on treatment and outcome, such as the DML method. For all other methods (or when the partially linear assumption is not guaranteed to be satisfied), use the non-parametric sensitivity analysis.
Based on this work:
Chernozhukov, V., Cinelli, C., Newey, W., Sharma, A., & Syrgkanis, V. (2022). Long Story Short: Omitted Variable Bias in Causal Machine Learning (No. w30302). National Bureau of Economic Research.
:param estimator: estimator of the causal model
:param num_splits: number of splits for cross validation. (default = 5)
:param shuffle_data : shuffle data or not before splitting into folds (default = False)
:param shuffle_random_seed: seed for randomly shuffling data
:param effect_strength_treatment: C^2_T, list of plausible sensitivity parameters for effect of confounder on treatment
:param effect_strength_outcome: C^2_Y, list of plausible sensitivity parameters for effect of confounder on outcome
:param benchmark_common_causes: names of variables for bounding strength of confounders
:param significance_level: confidence interval for statistical inference(default = 0.05)
:param frac_strength_treatment: strength of association between unobserved confounder and treatment compared to benchmark covariate
:param frac_strength_outcome: strength of association between unobserved confounder and outcome compared to benchmark covariate
:param g_s_estimator_list: list of estimator objects for finding g_s. These objects should have fit() and predict() functions.
:param g_s_estimator_param_list: list of dictionaries with parameters for tuning respective estimators in "g_s_estimator_list".
:param alpha_s_estimator_list: list of estimator objects for finding the treatment predictor which is used for alpha_s estimation. These objects should have fit() and predict_proba() functions.
:param alpha_s_estimator_param_list: list of dictionaries with parameters for tuning respective estimators in "alpha_s_estimator_list".
The order of the dictionaries in the list should be consistent with the estimator objects order in "g_s_estimator_list"
:param observed_common_causes: common causes dataframe
:param outcome: outcome dataframe
:param treatment: treatment dataframe
"""
def __init__(
self,
estimator=None,
num_splits=5,
shuffle_data=False,
shuffle_random_seed=None,
reisz_polynomial_max_degree=3,
significance_level=0.05,
effect_strength_treatment=None,
effect_strength_outcome=None,
benchmark_common_causes=None,
frac_strength_treatment=None,
frac_strength_outcome=None,
observed_common_causes=None,
treatment=None,
outcome=None,
g_s_estimator_list=None,
alpha_s_estimator_list=None,
g_s_estimator_param_list=None,
alpha_s_estimator_param_list=None,
**kwargs,
):
self.estimator = estimator
self.num_splits = num_splits
self.shuffle_data = shuffle_data
self.shuffle_random_seed = shuffle_random_seed
self.reisz_polynomial_max_degree = reisz_polynomial_max_degree
self.effect_strength_treatment = effect_strength_treatment
self.effect_strength_outcome = effect_strength_outcome
self.g_s_estimator_list = g_s_estimator_list
self.g_s_estimator_param_list = g_s_estimator_param_list
self.alpha_s_estimator_list = alpha_s_estimator_list
self.alpha_s_estimator_param_list = alpha_s_estimator_param_list
self.significance_level = significance_level
self.observed_common_causes = observed_common_causes
self.treatment = treatment
self.outcome = outcome
self.benchmark_common_causes = benchmark_common_causes
self.frac_strength_outcome = frac_strength_outcome
self.frac_strength_treatment = frac_strength_treatment
# whether the DGP is assumed to be partially linear
self.is_partial_linear = True
self.RV = None
self.RV_alpha = None
self.point_estimate = None
self.standard_error = None
self.theta_s = None
self.nu_2 = None
self.sigma_2 = None
self.S2 = None
self.S = None
self.neyman_orthogonal_score_outcome = None
self.neyman_orthogonal_score_treatment = None
self.neyman_orthogonal_score_theta = None
self.r2t_w = 0 # Partial R^2 of treatment with observed common causes
self.r2y_tw = 0 # Partial R^2 of outcome with treatment and observed common causes
self.results = None
self.num_points_per_contour = 30
self.benchmarking = self.is_benchmarking_needed()
self.logger = logging.getLogger(__name__)
[docs] def is_benchmarking_needed(self):
# can change this to allow default values that are same as the other parameter
if self.effect_strength_treatment is not None:
if self.effect_strength_outcome is None:
raise ValueError(
"Need to specify both partial_r2_confounder_treatment and partial_r2_confounder_outcome."
)
else:
if self.effect_strength_outcome is not None:
raise ValueError(
"Need to specify both partial_r2_confounder_treatment and partial_r2_confounder_outcome."
)
if self.benchmark_common_causes is not None:
if self.frac_strength_outcome is not None or self.frac_strength_treatment is not None:
return True
else:
raise ValueError(
"Need to specify at least one of effect_fraction_on_treatment or effect_fraction_on_outcome."
)
else:
return False
[docs] def get_phi_lower_upper(self, Cg, Calpha):
"""
Calculate lower and upper influence function (phi)
:param Cg: measure of strength of confounding that omitted variables generate in outcome regression
:param Calpha: measure of strength of confounding that omitted variables generate in treatment regression
:returns : lower bound of phi, upper bound of phi
"""
phi_lower = self.neyman_orthogonal_score_theta - ((Cg * Calpha) / (2 * self.S)) * (
-(self.sigma_2 / (self.nu_2**2)) * self.neyman_orthogonal_score_treatment
+ (1 / self.nu_2) * self.neyman_orthogonal_score_outcome
)
phi_upper = self.neyman_orthogonal_score_theta + ((Cg * Calpha) / (2 * self.S)) * (
-(self.sigma_2 / (self.nu_2**2)) * self.neyman_orthogonal_score_treatment
+ (1 / self.nu_2) * self.neyman_orthogonal_score_outcome
)
return phi_lower, phi_upper
[docs] def get_confidence_levels(self, r2yu_tw, r2tu_w, significance_level, is_partial_linear):
"""
Returns lower and upper bounds for the effect estimate, given different explanatory powers of unobserved confounders. It uses the following definitions.
Y_residual = Y - E[Y | X, T] (residualized outcome)
T_residual = T - E[T | X] (residualized treatment)
theta = E[(Y - E[Y | X, T)(T - E[T | X] )] / E[(T - E[T | X]) ^ 2]
σ² = E[(Y - E[Y | X, T]) ^ 2] (expected value of residual outcome)
ν^2 = E[(T - E[T | X])^2] (expected value of residual treatment)
ψ_θ = m(Ws , g) + (Y - g(Ws))α(Ws) - θ
ψ_σ² = (Y - g(Ws)) ^ 2 - σ²
ψ_ν2 = (2m(Ws, α ) - α^2) - ν^2
:param r2yu_tw: proportion of residual variance in the outcome explained by confounders
:param r2tu_w: proportion of residual variance in the treatment explained by confounders
:param significance_level: confidence interval for statistical inference(default = 0.05)
:param is_partial_linear: whether the data-generating process is assumed to be partially linear
:returns lower_confidence_bound: lower limit of confidence bound of the estimate
:returns upper_confidence_bound: upper limit of confidence bound of the estimate
:returns bias: omitted variable bias for the confounding scenario
"""
Cg2 = r2yu_tw # Strength of confounding that omitted variables generate in outcome regression
# Strength of confounding that omitted variables generate in treatment regression
Calpha2 = r2tu_w / (1 - r2tu_w)
Cg = np.sqrt(Cg2)
Calpha = np.sqrt(Calpha2)
self.S = np.sqrt(self.S2)
# computing the point estimate for the bounds
bound = self.S2 * Cg2 * Calpha2
bias = np.sqrt(bound)
theta_lower = self.theta_s - bias
theta_upper = self.theta_s + bias
if significance_level is not None:
phi_lower, phi_upper = self.get_phi_lower_upper(Cg=Cg, Calpha=Calpha)
expected_phi_lower = np.mean(phi_lower * phi_lower)
expected_phi_upper = np.mean(phi_upper * phi_upper)
n1 = phi_lower.shape[0]
n2 = phi_upper.shape[0]
stddev_lower = np.sqrt(expected_phi_lower / n1)
stddev_upper = np.sqrt(expected_phi_upper / n2)
probability = scipy.stats.norm.ppf(1 - significance_level)
lower_confidence_bound = theta_lower - probability * np.sqrt(
np.mean(stddev_lower * stddev_lower) + np.var(theta_lower)
)
upper_confidence_bound = theta_upper + probability * np.sqrt(
np.mean(stddev_upper * stddev_upper) + np.var(theta_upper)
)
else:
lower_confidence_bound = theta_lower
upper_confidence_bound = theta_upper
return lower_confidence_bound, upper_confidence_bound, bias
[docs] def calculate_robustness_value(self, alpha, is_partial_linear):
"""
Function to compute the robustness value of estimate against the confounders
:param alpha: confidence interval for statistical inference
:returns: robustness value
"""
for t_val in np.arange(0, 1, 0.01):
lower_confidence_bound, _, _ = self.get_confidence_levels(
r2yu_tw=t_val, r2tu_w=t_val, significance_level=alpha, is_partial_linear=is_partial_linear
)
if lower_confidence_bound <= 0:
return t_val
return t_val
[docs] def get_regression_r2(self, X, Y, numeric_features, split_indices, regression_model=None):
"""
Calculates the pearson non parametric partial R^2 from a regression function.
:param X: numpy array containing set of regressors
:param Y: outcome variable in regression
:param numeric_features: list of indices of columns with numeric features
:param split_indices: training and testing data indices obtained after cross folding
:returns: partial R^2 value
"""
if regression_model is None:
regression_model = get_generic_regressor(
cv=split_indices,
X=X,
Y=Y,
max_degree=self.reisz_polynomial_max_degree,
estimator_list=self.g_s_estimator_list,
estimator_param_list=self.g_s_estimator_param_list,
numeric_features=numeric_features,
)
num_samples = X.shape[0]
regression_pred = np.zeros(num_samples)
for train, test in split_indices:
reg_fn_fit = regression_model.fit(X[train], Y[train])
regression_pred[test] = reg_fn_fit.predict(X[test])
r2 = np.var(regression_pred) / np.var(Y)
return r2
[docs] def compute_r2diff_benchmarking_covariates(
self,
treatment_df,
features,
T,
Y,
W,
benchmark_common_causes,
split_indices=None,
second_stage_linear=False,
is_partial_linear=True,
):
"""
Computes the change in partial R^2 due to presence of unobserved confounders
:param split_indices: training and testing data indices obtained after cross folding
:param second_stage_linear: True if second stage regression is linear else False (default = False)
:param is_partial_linear: True if the data-generating process is assumed to be partially linear
:returns delta_r2_y_wj: observed additive gains in explanatory power with outcome when including benchmark covariate on regression equation
:returns delta_r2t_wj: observed additive gains in explanatory power with treatment when including benchmark covariate on regression equation
"""
T = T.ravel()
Y = Y.ravel()
num_samples = W.shape[0]
# common causes after removing the benchmark causes
W_j_df = features.drop(benchmark_common_causes, axis=1)
numeric_features = get_numeric_features(X=W_j_df)
W_j = W_j_df.to_numpy()
# dataframe with treatment and observed common causes after removing benchmark causes
T_W_j_df = pd.concat([treatment_df, W_j_df], axis=1)
numeric_features_t = get_numeric_features(X=T_W_j_df)
T_W_j = T_W_j_df.to_numpy()
# R^2 of treatment with observed common causes removing benchmark causes
if is_partial_linear:
r2t_w_j = self.get_regression_r2(X=W_j, Y=T, numeric_features=numeric_features, split_indices=split_indices)
delta_r2t_wj = self.r2t_w - r2t_w_j
else: # non parametric DGP
# return the variance of alpha_s
var_alpha_wj = self.get_alpharegression_var(
X=T_W_j,
numeric_features=numeric_features, # using numeric_features because the model only uses W
split_indices=split_indices,
)
delta_r2t_wj = var_alpha_wj
reg_function = None
if second_stage_linear is True:
reg_function = get_generic_regressor(
cv=split_indices,
X=T_W_j,
Y=Y,
max_degree=self.reisz_polynomial_max_degree,
estimator_list=[
LinearRegression(),
Pipeline(
[
(
"scale",
ColumnTransformer(
[("num", StandardScaler(), numeric_features)], remainder="passthrough"
),
),
("lasso_model", Lasso()),
]
),
SGDRegressor(alpha=0.001),
Ridge(),
RidgeCV(cv=5),
],
estimator_param_list=[
{"fit_intercept": [True, False]},
{"lasso_model__alpha": [0.01, 0.001, 1e-4, 1e-5, 1e-6]},
{"alpha": [0.0001, 1e-5, 0.01]},
{"alpha": [0.0001, 1e-5, 0.01, 1, 2]},
{"cv": [2, 3, 4]},
],
numeric_features=numeric_features,
) # Regressing over observed common causes removing benchmark causes and treatment
# R^2 of outcome with observed common causes and treatment after removing benchmark causes
r2y_tw_j = self.get_regression_r2(
X=T_W_j,
Y=Y,
numeric_features=numeric_features_t,
split_indices=split_indices,
regression_model=reg_function,
)
delta_r2_y_wj = self.r2y_tw - r2y_tw_j
return delta_r2_y_wj, delta_r2t_wj
[docs] def check_sensitivity(self, plot=True):
"""
Function to perform sensitivity analysis.
:param plot: plot = True generates a plot of lower confidence bound of the estimate for different variations of unobserved confounding.
plot = False overrides the setting
:returns: instance of PartialLinearSensitivityAnalyzer class
"""
# Obtaining theta_s (the obtained estimate)
self.point_estimate = self.estimator.intercept__inference().point_estimate
self.standard_error = self.estimator.intercept__inference().stderr
self.theta_s = self.point_estimate[0]
# Creating numpy arrays
features = self.observed_common_causes.copy()
treatment_df = self.treatment.copy()
X_df = pd.concat([treatment_df, features], axis=1)
W = features.to_numpy()
numeric_features = get_numeric_features(X_df)
X = X_df.to_numpy()
T = treatment_df.to_numpy()
Y = self.outcome.copy()
Y = Y.to_numpy()
# Setting up cross-validation parameters
cv = KFold(n_splits=self.num_splits, shuffle=self.shuffle_data, random_state=self.shuffle_random_seed)
num_samples = X.shape[0]
split_indices = list(cv.split(X))
indices = np.arange(0, num_samples, 1)
# tuple of residuals from first stage estimation [0,1], and the confounders [2]
residuals = self.estimator.residuals_
residualized_outcome = residuals[0] # T-E[T|W]
residualized_treatment = residuals[1] # Y - E[Y|W]
W = residuals[3]
n_residuals = residualized_outcome.shape[0]
indices = np.arange(0, n_residuals, 1)
residualized_outcome = residualized_outcome[indices]
residualized_treatment = residualized_treatment[indices]
# We need to estimate, sigma^2 = (Y-g_s)^2. We use the following derivation.
# Yres = Y - E[Y|W]
# E[Y|W] = f(x) + theta_s * E[T|W]
# Yres = Y - f(x) - theta_s * E[T|W]
# g(s) = theta_s * T + f(x)
# g(s) = theta_s * (T - E[T|W]) + f(x) + theta_s * E[T|W]
# g(s) = theta_s * Tres +f(x) + theta_s * E[T|W]
# Y - g(s) = Y - [theta_s * Tres + f(x) + theta_s * E[T|W] )
# Y - g(s) = ( Y - f(x) - theta_s * E[T|W]) - theta_s * Tres
# Y - g(s) = Yres - theta_s * Tres
residualized_outcome_second_stage = residualized_outcome - self.theta_s * residualized_treatment
self.sigma_2 = np.mean(residualized_outcome_second_stage**2)
# nu_2 is E[alpha_s^2]
self.nu_2 = np.mean(residualized_treatment**2)
self.S2 = self.sigma_2 / self.nu_2
# Now computing scores for finding the (1-a) confidence interval
self.neyman_orthogonal_score_outcome = (
residualized_outcome_second_stage * residualized_outcome_second_stage - self.sigma_2
)
self.neyman_orthogonal_score_treatment = residualized_treatment * residualized_treatment - self.nu_2
self.neyman_orthogonal_score_theta = (residualized_outcome_second_stage) * residualized_treatment / self.nu_2
# R^2 of treatment with observed common causes
reg_function_fit = self.estimator.models_t[0][0] # First Stage treatment model
treatment_model = reg_function_fit.predict(W)
self.r2t_w = np.var(treatment_model) / np.var(T)
# R^2 of outcome with treatment and observed common causes
self.g_s = Y - residualized_outcome_second_stage
self.r2y_tw = np.var(self.g_s) / np.var(Y)
self.g_s_j = np.zeros(num_samples)
if self.benchmarking:
delta_r2_y_wj, delta_r2t_wj = self.compute_r2diff_benchmarking_covariates(
treatment_df,
features,
T,
Y,
W,
self.benchmark_common_causes,
split_indices=split_indices,
second_stage_linear=False,
is_partial_linear=self.is_partial_linear,
)
# Partial R^2 of outcome after regressing over unobserved confounder, observed common causes and treatment
delta_r2y_u = self.frac_strength_outcome * delta_r2_y_wj
# Partial R^2 of treatment after regressing over unobserved confounder and observed common causes
delta_r2t_u = self.frac_strength_treatment * delta_r2t_wj
self.r2yu_tw = delta_r2y_u / (1 - self.r2y_tw)
self.r2tu_w = delta_r2t_u / (1 - self.r2t_w)
if self.r2yu_tw >= 1:
self.r2yu_tw = 1
self.logger.warning(
"Warning: r2yu_tw can not be > 1. Try a lower effect_fraction_on_outcome. Setting r2yu_tw to 1"
)
if self.r2tu_w >= 1:
self.r2tu_w = 1
self.logger.warning(
"Warning: r2tu_w can not be > 1. Try a lower effect_fraction_on_treatment. Setting r2tu_w to 1"
)
if self.r2yu_tw < 0:
self.r2yu_tw = 0
if self.r2tu_w < 0:
self.r2tu_w = 0
else:
self.r2yu_tw = self.effect_strength_outcome
self.r2tu_w = self.effect_strength_treatment
benchmarking_results = self.perform_benchmarking(
r2yu_tw=self.r2yu_tw,
r2tu_w=self.r2tu_w,
significance_level=self.significance_level,
is_partial_linear=self.is_partial_linear,
)
self.results = pd.DataFrame(benchmarking_results, index=[0])
self.RV = self.calculate_robustness_value(alpha=None, is_partial_linear=self.is_partial_linear)
self.RV_alpha = self.calculate_robustness_value(
alpha=self.significance_level, is_partial_linear=self.is_partial_linear
)
if plot == True:
self.plot()
return self
[docs] def plot(
self,
plot_type="lower_confidence_bound",
plot_size=(7, 7),
contours_color="blue",
critical_contour_color="red",
label_fontsize=9,
contour_linewidths=0.75,
contour_linestyles="solid",
contours_label_color="black",
critical_label_color="red",
unadjusted_estimate_marker="D",
unadjusted_estimate_color="black",
adjusted_estimate_marker="^",
adjusted_estimate_color="red",
legend_position=(1.05, 1),
):
"""
Plots and summarizes the sensitivity bounds as a contour plot, as they vary with the partial R^2 of the unobserved confounder(s) with the treatment and the outcome
Two types of plots can be generated, based on adjusted estimates or adjusted t-values
X-axis: Partial R^2 of treatment and unobserved confounder(s)
Y-axis: Partial R^2 of outcome and unobserved confounder(s)
We also plot bounds on the partial R^2 of the unobserved confounders obtained from observed covariates
:param plot_type: possible values are 'bias','lower_ate_bound','upper_ate_bound','lower_confidence_bound','upper_confidence_bound'
:param plot_size: tuple denoting the size of the plot (default = (7,7))
:param contours_color: color of contour line (default = blue)
String or array. If array, lines will be plotted with the specific color in ascending order.
:param critical_contour_color: color of threshold line (default = red)
:param label_fontsize: fontsize for labelling contours (default = 9)
:param contour_linewidths: linewidths for contours (default = 0.75)
:param contour_linestyles: linestyles for contours (default = "solid")
See : https://matplotlib.org/3.5.0/gallery/lines_bars_and_markers/linestyles.html for more examples
:param contours_label_color: color of contour line label (default = black)
:param critical_label_color: color of threshold line label (default = red)
:param unadjusted_estimate_marker: marker type for unadjusted estimate in the plot (default = 'D')
See: https://matplotlib.org/stable/api/markers_api.html
:param unadjusted_estimate_color: marker color for unadjusted estimate in the plot (default = "black")
:param adjusted_estimate_marker: marker type for bias adjusted estimates in the plot (default = '^')
:parm adjusted_estimate_color: marker color for bias adjusted estimates in the plot (default = "red")
:param legend_position:tuple denoting the position of the legend (default = (1.6, 0.6))
"""
critical_value = 0
fig, ax = plt.subplots(1, 1, figsize=plot_size)
ax.set_title("Sensitivity contour plot of %s" % plot_type)
if self.is_partial_linear:
ax.set_xlabel("Partial R^2 of unobserved confounder with treatment")
else:
ax.set_xlabel("Fraction of the variance in Reisz function explained by unobserved confounder")
ax.set_ylabel("Partial R^2 of unobserved confounder with outcome")
if self.effect_strength_treatment is None:
# adding 1.1 as plotting margin ensure that the benchmarked part is shown fully in plot
x_limit = (1.1 * self.r2tu_w) if self.benchmarking else 0.99
r2tu_w = np.arange(0.0, x_limit, x_limit / self.num_points_per_contour)
else:
x_limit = max(self.r2tu_w)
r2tu_w = self.r2tu_w
if self.effect_strength_outcome is None:
# adding 1.1 as plotting margin ensure that the benchmarked part is shown fully in plot
y_limit = (1.1 * self.r2yu_tw) if self.benchmarking else 0.99
r2yu_tw = np.arange(0.0, y_limit, y_limit / self.num_points_per_contour)
else:
y_limit = self.r2yu_tw[-1]
r2yu_tw = self.r2yu_tw
ax.set_xlim(-x_limit / 20, x_limit)
ax.set_ylim(-y_limit / 20, y_limit)
undjusted_estimates = None
contour_values = np.zeros((len(r2yu_tw), len(r2tu_w)))
for i in range(len(r2yu_tw)):
y = r2yu_tw[i]
for j in range(len(r2tu_w)):
x = r2tu_w[j]
benchmarking_results = self.perform_benchmarking(
r2yu_tw=y,
r2tu_w=x,
significance_level=self.significance_level,
is_partial_linear=self.is_partial_linear,
)
contour_values[i][j] = benchmarking_results[plot_type]
contour_plot = ax.contour(
r2tu_w,
r2yu_tw,
contour_values,
colors=contours_color,
linewidths=contour_linewidths,
linestyles=contour_linestyles,
)
ax.clabel(contour_plot, inline=1, fontsize=label_fontsize, colors=contours_label_color)
if critical_value >= contour_values.min() and critical_value <= contour_values.max() and plot_type != "bias":
contour_plot = ax.contour(
r2tu_w,
r2yu_tw,
contour_values,
colors=critical_contour_color,
linewidths=contour_linewidths,
levels=[critical_value],
)
ax.clabel(contour_plot, [critical_value], inline=1, fontsize=label_fontsize, colors=critical_label_color)
# Adding unadjusted point estimate
if (
plot_type == "lower_confidence_bound"
or plot_type == "upper_confidence_bound"
or plot_type == "lower_ate_bound"
or plot_type == "upper_ate_bound"
):
ax.scatter(
[0],
[0],
marker=unadjusted_estimate_marker,
color=unadjusted_estimate_color,
label="Unadjusted({:1.2f})".format(self.theta_s),
)
# Adding bounds to partial R^2 values for given strength of confounders
if self.benchmarking:
if self.frac_strength_treatment == self.frac_strength_outcome:
signs = str(round(self.frac_strength_treatment, 2))
else:
signs = str(round(self.frac_strength_treatment, 2)) + "/" + str(round(self.frac_strength_outcome, 2))
label = signs + " X " + str(self.benchmark_common_causes) + " ({:1.2f}) ".format(self.results[plot_type][0])
ax.scatter(
self.r2tu_w, self.r2yu_tw, color=adjusted_estimate_color, marker=adjusted_estimate_marker, label=label
)
plt.margins()
ax.legend(bbox_to_anchor=legend_position, loc="upper left")
plt.show()
def __str__(self):
s = "Sensitivity Analysis to Unobserved Confounding using partial R^2 parameterization\n\n"
s += "Original Effect Estimate : {0}\n".format(self.theta_s)
s += "Robustness Value : {0}\n\n".format(self.RV)
s += "Robustness Value (alpha={0}) : {1}\n\n".format(self.significance_level, self.RV_alpha)
s += "Interpretation of results :\n"
s += "Any confounder explaining less than {0}% percent of the residual variance of both the treatment and the outcome would not be strong enough to explain away the observed effect i.e bring down the estimate to 0 \n\n".format(
round(self.RV * 100, 2)
)
s += "For a significance level of {0}%, any confounder explaining more than {1}% percent of the residual variance of both the treatment and the outcome would be strong enough to make the estimated effect not 'statistically significant'\n\n".format(
self.significance_level * 100, round(self.RV_alpha * 100, 2)
)
return s