Source code for dowhy.interpreters.confounder_distribution_interpreter

import numpy as np
import pandas as pd

from dowhy.causal_estimator import CausalEstimate
from dowhy.causal_estimators.propensity_score_weighting_estimator import PropensityScoreWeightingEstimator
from dowhy.interpreters.visual_interpreter import VisualInterpreter


[docs]class ConfounderDistributionInterpreter(VisualInterpreter): SUPPORTED_ESTIMATORS = [ PropensityScoreWeightingEstimator, ] def __init__(self, estimate, fig_size, font_size, var_name, var_type, **kwargs): """ :param estimate: Causal estimate :param fig_size: Size of the figure :param font_size: Size of the font of the plot title :param var_name: The confounding variable for which distribution changes should be compared :param var_type: Type of the confounding variable; must be one of 'continuous' or 'discrete' """ super().__init__(estimate, **kwargs) if not isinstance(estimate, CausalEstimate): error_msg = "The interpreter method expects a CausalEstimate object." self.logger.error(error_msg) raise ValueError(error_msg) self.estimator = self.estimate.estimator if not any( isinstance(self.estimator, est_class) for est_class in ConfounderDistributionInterpreter.SUPPORTED_ESTIMATORS ): error_msg = "The interpreter method only supports propensity score weighting estimator." self.logger.error(error_msg) raise ValueError(error_msg) if var_type not in {"continuous", "discrete"}: error_msg = "var_type must be one of 'continuous' or 'discrete'." self.logger.error(error_msg) raise ValueError(error_msg) if var_type == "continuous": error_msg = "Distributional changes plot for continuous variables is not yet implemented." self.logger.error(error_msg) raise ValueError(error_msg) self.fig_size = fig_size self.font_size = font_size self.var_name = var_name
[docs] @staticmethod def discrete_dist_plot(labels, not_treated_counts, treated_counts, ax, title, var_name, font_size, width=0.35): """ Plot of the treated vs untreated. """ ax.bar(labels - width / 2, not_treated_counts, width, label="Untreated") ax.bar(labels + width / 2, treated_counts, width, label="Treated") ax.set_xlabel(var_name) ax.set_ylabel("Count") ax.set_title(title, fontsize=font_size) ax.set_xticks(labels) ax.set_xticklabels(labels) ax.legend()
[docs] def interpret(self, data: pd.DataFrame): """ Shows distribution changes for confounding variables before and after applying inverse propensity weights. """ cols = self.estimator._observed_common_causes_names + self.estimate._treatment_name df = data[cols].copy() treated = self.estimate._treatment_name[0] propensity = self.estimate.propensity_scores # add weight column df.loc[:, "weight"] = df.loc[:, treated] * (propensity) ** (-1) + (1 - df.loc[:, treated]) * ( 1 - propensity ) ** (-1) # before weights are applied we count number rows in each category # which is equivalent to summing over weight=1 barplot_df_before = df.groupby([self.var_name, treated]).size().reset_index(name="count") # after weights are applied we need to sum over the given weights barplot_df_after = df.groupby([self.var_name, treated]).agg({"weight": np.sum}).reset_index() barplot_df_after.rename(columns={"weight": "count"}, inplace=True) title1 = "Distribution of " + self.var_name + " before applying the weights" title2 = "Distribution of " + self.var_name + " after applying the weights" import matplotlib.pyplot as plt fig, (ax1, ax2) = plt.subplots(1, 2, figsize=self.fig_size) iterable = zip([barplot_df_before, barplot_df_after], [ax1, ax2], [title1, title2]) for plot_df, ax, title in iterable: aggregated_not_treated = plot_df[plot_df[treated] == False].reset_index() aggregated_treated = plot_df[plot_df[treated] == True].reset_index() labels = aggregated_not_treated[self.var_name].astype("float") not_treated_counts = aggregated_not_treated["count"] treated_counts = aggregated_treated["count"] self.discrete_dist_plot( labels, not_treated_counts, treated_counts, ax, title, self.var_name, self.font_size ) fig.tight_layout() plt.show()