Source code for networkx.algorithms.connectivity.kcomponents

"""
Moody and White algorithm for k-components
"""

from collections import defaultdict
from itertools import combinations
from operator import itemgetter

import networkx as nx

# Define the default maximum flow function.
from networkx.algorithms.flow import edmonds_karp
from networkx.utils import not_implemented_for

default_flow_func = edmonds_karp

__all__ = ["k_components"]



[docs]
@not_implemented_for("directed")
@nx._dispatchable
def k_components(G, flow_func=None):
    r"""Returns the k-component structure of a graph G.

    A `k`-component is a maximal subgraph of a graph G that has, at least,
    node connectivity `k`: we need to remove at least `k` nodes to break it
    into more components. `k`-components have an inherent hierarchical
    structure because they are nested in terms of connectivity: a connected
    graph can contain several 2-components, each of which can contain
    one or more 3-components, and so forth.

    Parameters
    ----------
    G : NetworkX graph

    flow_func : function
        Function to perform the underlying flow computations. Default value
        :meth:`edmonds_karp`. This function performs better in sparse graphs with
        right tailed degree distributions. :meth:`shortest_augmenting_path` will
        perform better in denser graphs.

    Returns
    -------
    k_components : dict
        Dictionary with all connectivity levels `k` in the input Graph as keys
        and a list of sets of nodes that form a k-component of level `k` as
        values.

    Raises
    ------
    NetworkXNotImplemented
        If the input graph is directed.

    Examples
    --------
    >>> # Petersen graph has 10 nodes and it is triconnected, thus all
    >>> # nodes are in a single component on all three connectivity levels
    >>> G = nx.petersen_graph()
    >>> k_components = nx.k_components(G)
    >>> sorted(k_components)
    [1, 2, 3]
    >>> all(comps == [set(G)] for comps in k_components.values())
    True

    Notes
    -----
    Moody and White [1]_ (appendix A) provide an algorithm for identifying
    k-components in a graph, which is based on Kanevsky's algorithm [2]_
    for finding all minimum-size node cut-sets of a graph (implemented in
    :meth:`all_node_cuts` function):

        1. Compute node connectivity, k, of the input graph G.

        2. Identify all k-cutsets at the current level of connectivity using
           Kanevsky's algorithm.

        3. Generate new graph components based on the removal of
           these cutsets. Nodes in a cutset belong to both sides
           of the induced cut.

        4. If the graph is neither complete nor trivial, return to 1;
           else end.

    This implementation also uses some heuristics (see [3]_ for details)
    to speed up the computation.

    See also
    --------
    node_connectivity
    all_node_cuts
    biconnected_components : special case of this function when k=2
    k_edge_components : similar to this function, but uses edge-connectivity
        instead of node-connectivity

    References
    ----------
    .. [1]  Moody, J. and D. White (2003). Social cohesion and embeddedness:
            A hierarchical conception of social groups.
            American Sociological Review 68(1), 103--28.
            http://www2.asanet.org/journals/ASRFeb03MoodyWhite.pdf

    .. [2]  Kanevsky, A. (1993). Finding all minimum-size separating vertex
            sets in a graph. Networks 23(6), 533--541.
            http://onlinelibrary.wiley.com/doi/10.1002/net.3230230604/abstract

    .. [3]  Torrents, J. and F. Ferraro (2015). Structural Cohesion:
            Visualization and Heuristics for Fast Computation.
            https://arxiv.org/pdf/1503.04476v1

    """
    # Dictionary with connectivity level (k) as keys and a list of
    # sets of nodes that form a k-component as values. Note that
    # k-components can overlap (but only k - 1 nodes).
    k_components = defaultdict(list)
    # Define default flow function
    if flow_func is None:
        flow_func = default_flow_func
    # Bicomponents as a base to check for higher order k-components
    for component in nx.connected_components(G):
        # isolated nodes have connectivity 0
        comp = set(component)
        if len(comp) > 1:
            k_components[1].append(comp)
    bicomponents = [G.subgraph(c) for c in nx.biconnected_components(G)]
    for bicomponent in bicomponents:
        bicomp = set(bicomponent)
        # avoid considering dyads as bicomponents
        if len(bicomp) > 2:
            k_components[2].append(bicomp)
    for B in bicomponents:
        if len(B) <= 2:
            continue
        k = nx.node_connectivity(B, flow_func=flow_func)
        if k > 2:
            k_components[k].append(set(B))
        # Perform cuts in a DFS like order.
        cuts = list(nx.all_node_cuts(B, k=k, flow_func=flow_func))
        stack = [(k, _generate_partition(B, cuts, k))]
        while stack:
            (parent_k, partition) = stack[-1]
            try:
                nodes = next(partition)
                C = B.subgraph(nodes)
                this_k = nx.node_connectivity(C, flow_func=flow_func)
                if this_k > parent_k and this_k > 2:
                    k_components[this_k].append(set(C))
                cuts = list(nx.all_node_cuts(C, k=this_k, flow_func=flow_func))
                if cuts:
                    stack.append((this_k, _generate_partition(C, cuts, this_k)))
            except StopIteration:
                stack.pop()

    # This is necessary because k-components may only be reported at their
    # maximum k level. But we want to return a dictionary in which keys are
    # connectivity levels and values list of sets of components, without
    # skipping any connectivity level. Also, it's possible that subsets of
    # an already detected k-component appear at a level k. Checking for this
    # in the while loop above penalizes the common case. Thus we also have to
    # _consolidate all connectivity levels in _reconstruct_k_components.
    return _reconstruct_k_components(k_components)



def _consolidate(sets, k):
    """Merge sets that share k or more elements.

    See: http://rosettacode.org/wiki/Set_consolidation

    The iterative python implementation posted there is
    faster than this because of the overhead of building a
    Graph and calling nx.connected_components, but it's not
    clear for us if we can use it in NetworkX because there
    is no licence for the code.

    """
    G = nx.Graph()
    nodes = dict(enumerate(sets))
    G.add_nodes_from(nodes)
    G.add_edges_from(
        (u, v) for u, v in combinations(nodes, 2) if len(nodes[u] & nodes[v]) >= k
    )
    for component in nx.connected_components(G):
        yield set.union(*[nodes[n] for n in component])


def _generate_partition(G, cuts, k):
    """Generate parts of G induced by its minimum-size node cutsets.

    Following Moody and White (appendix A, step 3), cutsets are applied
    one at a time: removing a cutset splits the current set of nodes into
    two or more connected components, and each component plus the cutset
    that induced it becomes a new candidate part. Every j-component of
    ``G`` with ``j > k`` is preserved intact in at least one generated
    part, because removing ``k < j`` nodes cannot disconnect it.

    Applying cutsets one at a time is necessary for correctness: removing
    the union of all cutsets in a single pass can drop cutset nodes whose
    neighbors all belong to other cutsets, silently losing k-components.
    """
    order = G.order()
    cuts = [set(cut) for cut in cuts]
    parts = []
    seen = set()
    stack = [set(G)]
    while stack:
        nodes = stack.pop()
        for cut in cuts:
            if not cut < nodes:
                continue
            components = list(nx.connected_components(G.subgraph(nodes - cut)))
            if len(components) > 1:
                for component in components:
                    # Nodes of the cutset belong to both sides of the induced
                    # cut, but only if they have a neighbor in the component:
                    # a cutset node from a j-component (j > k) always has at
                    # least two neighbors in the component, while attaching a
                    # neighborless cutset node would disconnect the part.
                    child = frozenset(
                        component
                        | {n for n in cut if any(v in component for v in G[n])}
                    )
                    if child not in seen:
                        seen.add(child)
                        stack.append(set(child))
                break
        else:
            # No cutset splits this part any further.
            if len(nodes) < order:
                parts.append(nodes)
    # Merging parts that share at least k+1 nodes reduces the number of
    # subproblems that the loop in k_components examines. This is a
    # performance heuristic, not needed for correctness: no j-component
    # with j > k can span two parts. If merging rebuilds the whole graph,
    # fall back to the raw parts so the caller always recurses on
    # strictly smaller graphs.
    consolidated = list(_consolidate(parts, k + 1))
    if any(len(part) == order for part in consolidated):
        yield from parts
    else:
        yield from consolidated


def _reconstruct_k_components(k_comps):
    result = {}
    max_k = max(k_comps) if k_comps else 0
    for k in range(max_k, 0, -1):
        if k == max_k:
            comps = list(_consolidate(k_comps[k], k))
        elif k not in k_comps:
            comps = list(_consolidate(result[k + 1], k))
        else:
            # Propagate a component from level k+1 down to level k unless it
            # is already contained in a single component recorded at level k.
            # Checking against the union of all components recorded at k is
            # not enough: a (k+1)-level component can span several k-level
            # candidates without being a subset of any of them, and skipping
            # it would replace a genuine k-component by its fragments.
            to_add = [
                c for c in result[k + 1] if not any(c <= comp for comp in k_comps[k])
            ]
            if to_add:
                comps = list(_consolidate(k_comps[k] + to_add, k))
            else:
                comps = list(_consolidate(k_comps[k], k))
        # A single _consolidate pass merges on pairwise overlaps of its
        # input sets, so its output can still contain a set nested inside
        # a merged one; a nested set is never a maximal k-connected
        # subgraph, so drop it.
        result[k] = [c for c in comps if not any(c < d for d in comps)]
    return result


def build_k_number_dict(kcomps):
    return {
        node: k
        for k, comps in sorted(kcomps.items(), key=itemgetter(0))
        for comp in comps
        for node in comp
    }