Source code for pywhy_graphs.algorithms.semi_directed_paths

import networkx as nx

from ..config import EdgeType
from ..typing import Node

__all__ = [
    "is_semi_directed_path",
    "all_semi_directed_paths",
]


def _empty_generator():
    yield from ()


[docs] def is_semi_directed_path(G, nodes): """Returns True if and only if `nodes` form a semi-directed path in `G`. A *semi-directed path* in a graph is a nonempty sequence of nodes in which no node appears more than once in the sequence, each adjacent pair of nodes in the sequence is adjacent in the graph and where each pair of adjacent nodes does not contain a directed endpoint in the direction towards the start of the sequence. That is ``(a -> b o-> c <-> d -> e)`` is not a semi-directed path from ``a`` to ``e`` because ``d *-> c`` is a directed endpoint in the direction towards ``a``. Parameters ---------- G : graph A mixed-edge graph. nodes : list A list of one or more nodes in the graph `G`. Returns ------- bool Whether the given list of nodes represents a semi-directed path in `G`. Notes ----- This function is very similar to networkx's :func:`networkx.algorithms.simple_paths.is_simple_path` function. """ # The empty list is not a valid path. Could also return # NetworkXPointlessConcept here. if len(nodes) == 0: return False # If the list is a single node, just check that the node is actually # in the graph. if len(nodes) == 1: return nodes[0] in G # check that all nodes in the list are in the graph, if at least one # is not in the graph, then this is not a semi-directed path if not all(n in G for n in nodes): return False # If the list contains repeated nodes, then it's not a semi-directed path if len(set(nodes)) != len(nodes): return False # Test that each adjacent pair of nodes is adjacent and that there # is no directed endpoint towards the beginning of the sequence. for idx in range(len(nodes) - 1): u, v = nodes[idx], nodes[idx + 1] if G.has_edge(v, u, EdgeType.DIRECTED.value) or G.has_edge(v, u, EdgeType.BIDIRECTED.value): return False elif not G.has_edge(u, v): return False return True
[docs] def all_semi_directed_paths(G, source: Node, target: Node, cutoff: int = None): """Generate all semi-directed paths from source to target in G. A semi-directed path is a path from ``source`` to ``target`` in that no end-point is directed from ``target`` to ``source``. I.e. ``target *-> source`` does not exist. Parameters ---------- G : Graph The graph. source : Node The source node. target : Node The target node. cutoff : integer, optional Depth to stop the search. Only paths of length <= cutoff are returned. Notes ----- This algorithm is very similar to networkx's :func:`networkx.algorithms.simple_paths.all_simple_paths` function. This algorithm uses a modified depth-first search to generate the paths [1]_. A single path can be found in $O(V+E)$ time but the number of semi-directed paths in a graph can be very large, e.g. $O(n!)$ in the complete graph of order $n$. This function does not check that a path exists between `source` and `target`. For large graphs, this may result in very long runtimes. Consider using `has_path` to check that a path exists between `source` and `target` before calling this function on large graphs. References ---------- .. [1] R. Sedgewick, "Algorithms in C, Part 5: Graph Algorithms", Addison Wesley Professional, 3rd ed., 2001. """ if source not in G: raise nx.NodeNotFound("source node %s not in graph" % source) if target in G: targets = {target} else: try: targets = set(target) # type: ignore except TypeError: raise nx.NodeNotFound("target node %s not in graph" % target) if source in targets: return _empty_generator() if cutoff is None: cutoff = len(G) - 1 if cutoff < 1: return _empty_generator() if cutoff is None: cutoff = len(G) - 1 return _all_semi_directed_paths_graph(G, source, targets, cutoff)
def _all_semi_directed_paths_graph( G, source, targets, cutoff, directed_edge_name="directed", bidirected_edge_name="bidirected" ): """See networkx's all_simple_paths function. This performs a depth-first search for all semi-directed paths from source to target. """ # memoize each node that was already visited visited = {source: True} # iterate over neighbors of source stack = [iter(G.neighbors(source))] # if source has no neighbors, then prev_nodes should be None prev_nodes = [source] while stack: # get the iterator through nbrs for the current node nbrs = stack[-1] prev_node = prev_nodes[-1] nbr = next(nbrs, None) # The first condition guarantees that there is not a directed endpoint # along the path from source to target that points towards source. if ( G.has_edge(nbr, prev_node, directed_edge_name) or G.has_edge(nbr, prev_node, bidirected_edge_name) ) and nbr not in visited: # If we've found a directed edge from child to prev_node, # that we haven't visited, then we don't need to continue down this path continue elif nbr is None: # once all children are visited, pop the stack # and remove the child from the visited set stack.pop() visited.popitem() prev_nodes.pop() elif len(visited) < cutoff: if nbr in visited: continue if nbr in targets: # we've found a path to a target yield list(visited) + [nbr] visited[nbr] = True if targets - set(visited.keys()): # expand stack until find all targets stack.append(iter(G.neighbors(nbr))) prev_nodes.append(nbr) else: visited.popitem() # maybe other ways to child else: # len(visited) == cutoff: for target in (targets & (set(nbrs) | {nbr})) - set(visited.keys()): yield list(visited) + [target] stack.pop() visited.popitem() prev_nodes.pop()