Source code for pywhy_graphs.functional.linear

from typing import Callable, List, Optional, Set

import networkx as nx
import numpy as np

from pywhy_graphs.typing import Node

from .additive import generate_edge_functions_for_node
from .utils import _preprocess_parameter_inputs


[docs] def make_graph_linear_gaussian( G: nx.DiGraph, node_mean_lims: Optional[List[float]] = None, node_std_lims: Optional[List[float]] = None, edge_functions: Optional[List[Callable[[float], float]]] = None, edge_weight_lims: Optional[List[float]] = None, random_state=None, ) -> nx.DiGraph: r"""Convert an existing DAG to a linear Gaussian graphical model. All nodes are sampled from a normal distribution with parametrizations defined uniformly at random between the limits set by the input parameters. The edges apply then a weight and a function based on the inputs in an additive fashion. For node :math:`X_i`, we have: .. math:: X_i = \\sum_{j \in parents} w_j f_j(X_j) + \\epsilon_i where: - :math:`\\epsilon_i \sim N(\mu_i, \sigma_i)`, where :math:`\mu_i` is sampled uniformly at random from `node_mean_lims` and :math:`\sigma_i` is sampled uniformly at random from `node_std_lims`. - :math:`w_j \sim U(\\text{edge_weight_lims})` - :math:`f_j` is a function sampled uniformly at random from `edge_functions` Parameters ---------- G : NetworkX DiGraph The graph to sample data from. The graph will be modified in-place to get the weights and functions of the edges. node_mean_lims : Optional[List[float]], optional The lower and upper bounds of the mean of the Gaussian random variable, by default None, which defaults to a mean of 0. node_std_lims : Optional[List[float]], optional The lower and upper bounds of the std of the Gaussian random variable, by default None, which defaults to a std of 1. edge_functions : List[Callable[float]], optional The set of edge functions that take in an iid sample from the parent and computes a transformation (possibly nonlinear), such as ``(lambda x: x**2, lambda x: x)``, by default None, which defaults to the identity function ``lambda x: x``. edge_weight_lims : Optional[List[float]], optional The lower and upper bounds of the edge weight, by default None, which defaults to a weight of 1. random_state : int, optional Random seed, by default None. Returns ------- G : NetworkX DiGraph NetworkX graph with the edge weights and functions set with node attributes set with ``'parent_function'``, and ``'gaussian_noise_function'``. Moreover the graph attribute ``'linear_gaussian'`` is set to ``True``. """ G = G.copy() if hasattr(G, "get_graphs"): directed_G = G.get_graphs("directed") else: directed_G = G if not nx.is_directed_acyclic_graph(directed_G): raise ValueError("The input graph must be a DAG.") # preprocess hyperparameters and check for validity ( node_mean_lims_, node_std_lims_, edge_functions_, edge_weight_lims_, ) = _preprocess_parameter_inputs( node_mean_lims, node_std_lims, edge_functions, edge_weight_lims ) # Create list of topologically sorted nodes top_sort_idx = list(nx.topological_sort(directed_G)) # sample noise and edge functions for each node and its parents for node in top_sort_idx: # sample noise G = generate_noise_for_node( G, node, node_mean_lims_, node_std_lims_, random_state=random_state ) # sample edge functions and weights generate_edge_functions_for_node( G, node=node, edge_weight_lims=edge_weight_lims_, edge_functions=edge_functions_, random_state=random_state, ) G.graph["functional"] = "linear_gaussian" return G
def generate_noise_for_node(G, node, node_mean_lims, node_std_lims, random_state=None): rng = np.random.default_rng(random_state) # sample noise mean = rng.uniform(low=node_mean_lims[0], high=node_mean_lims[1]) std = rng.uniform(low=node_std_lims[0], high=node_std_lims[1]) G.nodes[node]["exogenous_distribution"] = lambda: rng.normal(**{"loc": mean, "scale": std}) # default is the uniform choice function G.nodes[node]["exogenous_function"] = lambda x: x return G
[docs] def apply_linear_soft_intervention( G, targets: Set[Node], type: str = "additive", random_state=None ): """Applies a soft intervention to a linear Gaussian graph. Parameters ---------- G : Graph Linear functional causal graph. targets : Set[Node] The set of nodes to intervene on simultanenously. type : str, optional Type of intervention, by default "additive". random_state : RandomState, optional Random seed, by default None. Returns ------- G : Graph The functional linear causal graph with the intervention applied on the target nodes. The perturbation occurs on the ``gaussian_noise_function`` of the target nodes. That is, the soft intervention, perturbs the exogenous noise of the target nodes. """ if not G.graph.get("linear_gaussian", True): raise ValueError("The input graph must be a linear Gaussian graph.") if not all(target in G.nodes for target in targets): raise ValueError(f"All targets {targets} must be in the graph: {G.nodes}.") rng = np.random.default_rng(random_state) for target in targets: if type == "additive": G.nodes[target]["gaussian_noise_function"]["mean"] += rng.uniform(low=-1, high=1) return G