Source code for dowhy.utils.dgp
import numpy as np
import pandas as pd
[docs]class DataGeneratingProcess:
DEFAULT_PERCENTILE = 0.9
def __init__(self, **kwargs):
"""
Base class for implementation of data generating process.
Subclasses implement functions that create various data generating processes. All data generating processes are in the package "dowhy.utils.dgps".
"""
self.treatment = kwargs["treatment"]
self.outcome = kwargs["outcome"]
self.confounder = kwargs["confounder"]
self.effect_modifier = kwargs["effect_modifier"]
self.weights = kwargs.pop("weights", {})
self.bias = kwargs.pop("bias", {})
self.seed = kwargs.pop("seed", None)
self.treatment_is_binary = kwargs.pop("treatment_is_binary", False)
if self.treatment_is_binary:
self.percentile = kwargs.pop("percentile", DataGeneratingProcess.DEFAULT_PERCENTILE)
elif kwargs.pop("percentile", None) is not None:
raise ValueError("Cannot use percentile, if the input is non-binary")
else:
self.percentile = "NA"
self.true_value = None
if self.seed is not None:
np.random.seed(self.seed)
[docs] def generate_data(self):
raise NotImplementedError()
[docs] def generation_process(self):
raise NotImplementedError()
[docs] def convert_to_binary(self, data, deterministic=False):
if deterministic:
precentile = np.percentile(data, self.percentile, axis=0)
binary_treat_value = data <= precentile
else:
temp = data.argsort(axis=0)
ranks = np.empty_like(temp)
ranks[temp[:, 0], 0] = np.arange(data.shape[0])
prob_t = ranks / data.shape[0]
# Generating data with equal 0 and 1 (since ranks are uniformly distributed)
binary_treat_value = np.random.binomial(1, prob_t[:, 0], data.shape[0])
# Flipping some values
if self.percentile >= 0.5:
mask = np.random.binomial(
1, (1 - self.percentile) * 2, len(binary_treat_value[binary_treat_value == 1])
)
binary_treat_value[binary_treat_value == 1] = mask * binary_treat_value[binary_treat_value == 1]
else:
mask = np.random.binomial(1, 1 - self.percentile * 2, len(binary_treat_value[binary_treat_value == 0]))
binary_treat_value[binary_treat_value == 0] = mask + binary_treat_value[binary_treat_value == 0]
binary_treat_value = binary_treat_value[:, np.newaxis]
return binary_treat_value.astype(float)
def __str__(self):
rep = """
treatment:{}
outcome:{}
confounder: {}
effect_modifier: {}
weights: {}
bias: {}
seed: {}
treatment_is_binary: {}
percentile: {}
""".format(
self.treatment,
self.outcome,
self.confounder,
self.effect_modifier,
self.weights,
self.bias,
self.seed,
self.treatment_is_binary,
self.percentile,
)
return rep