Source code for networkx.algorithms.community.bipartitions

"""Functions for splitting a network into two communities (finding a bipartition)."""

import random
from copy import deepcopy
from itertools import count

import networkx as nx

__all__ = [
    "kernighan_lin_bisection",
    "spectral_modularity_bipartition",
    "greedy_node_swap_bipartition",
]


def _kernighan_lin_sweep(edge_info, side):
    """
    This is a modified form of Kernighan-Lin, which moves single nodes at a
    time, alternating between sides to keep the bisection balanced.  We keep
    two min-heaps of swap costs to make optimal-next-move selection fast.
    """
    heap0, heap1 = cost_heaps = nx.utils.BinaryHeap(), nx.utils.BinaryHeap()
    # we use heap methods insert, pop, and get
    for u, nbrs in edge_info.items():
        cost_u = sum(wt if side[v] else -wt for v, wt in nbrs.items())
        if side[u]:
            heap1.insert(u, cost_u)
        else:
            heap0.insert(u, -cost_u)

    def _update_heap_values(node):
        side_node = side[node]
        for nbr, wt in edge_info[node].items():
            side_nbr = side[nbr]
            if side_nbr == side_node:
                wt = -wt
            heap_nbr = cost_heaps[side_nbr]
            if nbr in heap_nbr:
                cost_nbr = heap_nbr.get(nbr) + 2 * wt
                # allow_increase lets us update a value already on the heap
                heap_nbr.insert(nbr, cost_nbr, allow_increase=True)

    i = 0
    totcost = 0
    while heap0 and heap1:
        u, cost_u = heap0.pop()
        _update_heap_values(u)
        v, cost_v = heap1.pop()
        _update_heap_values(v)
        totcost += cost_u + cost_v
        i += 1
        yield totcost, i, (u, v)



[docs]
@nx.utils.not_implemented_for("directed")
@nx.utils.py_random_state(4)
@nx._dispatchable(edge_attrs="weight")
def kernighan_lin_bisection(G, partition=None, max_iter=10, weight="weight", seed=None):
    """Partition a graph into two blocks using the Kernighan–Lin algorithm.

    This algorithm partitions a network into two sets by iteratively
    swapping pairs of nodes to reduce the edge cut between the two sets.  The
    pairs are chosen according to a modified form of Kernighan-Lin [1]_, which
    moves nodes individually, alternating between sides to keep the bisection
    balanced.

    Kernighan-Lin is an approximate algorithm for maximal modularity bisection.
    In [2]_ they suggest that fine-tuned improvements can be made using
    greedy node swapping, (see `greedy_node_swap_bipartition`).
    The improvements are typically only a few percent of the modularity value.
    But they claim that can make a difference between a good and excellent method.
    This function does not perform any improvements. But you can do that yourself.

    Parameters
    ----------
    G : NetworkX graph
        Graph must be undirected.

    partition : tuple
        Pair of iterables containing an initial partition. If not
        specified, a random balanced partition is used.

    max_iter : int
        Maximum number of times to attempt swaps to find an
        improvement before giving up.

    weight : string or function (default: "weight")
        If this is a string, then edge weights will be accessed via the
        edge attribute with this key (that is, the weight of the edge
        joining `u` to `v` will be ``G.edges[u, v][weight]``). If no
        such edge attribute exists, the weight of the edge is assumed to
        be one.

        If this is a function, the weight of an edge is the value
        returned by the function. The function must accept exactly three
        positional arguments: the two endpoints of an edge and the
        dictionary of edge attributes for that edge. The function must
        return a number or None to indicate a hidden edge.

    seed : integer, random_state, or None (default)
        Indicator of random number generation state.
        See :ref:`Randomness<randomness>`.
        Only used if partition is None

    Returns
    -------
    partition : tuple
        A pair of sets of nodes representing the bipartition.

    Raises
    ------
    NetworkXError
        If `partition` is not a valid partition of the nodes of the graph.

    References
    ----------
    .. [1] Kernighan, B. W.; Lin, Shen (1970).
       "An efficient heuristic procedure for partitioning graphs."
       *Bell Systems Technical Journal* 49: 291--307.
       Oxford University Press 2011.
    .. [2] M. E. J. Newman,
       "Modularity and community structure in networks",
       PNAS, 103 (23), p. 8577-8582,
       https://doi.org/10.1073/pnas.0601602103

    """
    nodes = list(G)

    if partition is None:
        seed.shuffle(nodes)
        mid = len(nodes) // 2
        A, B = nodes[:mid], nodes[mid:]
    else:
        try:
            A, B = partition
        except (TypeError, ValueError) as err:
            raise nx.NetworkXError("partition must be two sets") from err
        if not nx.community.is_partition(G, [A, B]):
            raise nx.NetworkXError("partition invalid")

    side = {node: (node in A) for node in nodes}

    # ruff: noqa: E731   skips check for no lambda functions
    # Using shortest_paths _weight_function with sum instead of min on multiedges
    if callable(weight):
        sum_weight = weight
    elif G.is_multigraph():
        sum_weight = lambda u, v, d: sum(dd.get(weight, 1) for dd in d.values())
    else:
        sum_weight = lambda u, v, d: d.get(weight, 1)

    edge_info = {
        u: {v: wt for v, d in nbrs.items() if (wt := sum_weight(u, v, d)) is not None}
        for u, nbrs in G._adj.items()
    }

    for i in range(max_iter):
        costs = list(_kernighan_lin_sweep(edge_info, side))
        # find out how many edges to update: min_i
        min_cost, min_i, _ = min(costs)
        if min_cost >= 0:
            break

        for _, _, (u, v) in costs[:min_i]:
            side[u] = 1
            side[v] = 0

    part1 = {u for u, s in side.items() if s == 0}
    part2 = {u for u, s in side.items() if s == 1}
    return part1, part2




[docs]
@nx.utils.not_implemented_for("directed")
@nx.utils.not_implemented_for("multigraph")
def spectral_modularity_bipartition(G):
    r"""Return a bipartition of the nodes based on the spectrum of the
    modularity matrix of the graph.

    This method calculates the eigenvector associated with the second
    largest eigenvalue of the modularity matrix, where the modularity
    matrix *B* is defined by

    ..math::

        B_{i j} = A_{i j} - \frac{k_i k_j}{2 m},

    where *A* is the adjacency matrix, `k_i` is the degree of node *i*,
    and *m* is the number of edges in the graph. Nodes whose
    corresponding values in the eigenvector are negative are placed in
    one block, nodes whose values are nonnegative are placed in another
    block.

    Parameters
    ----------
    G : NetworkX graph

    Returns
    -------
    C : tuple
        Pair of communities as two sets of nodes of ``G``, partitioned
        according to second largest eigenvalue of the modularity matrix.

    Examples
    --------
    >>> G = nx.karate_club_graph()
    >>> MrHi, Officer = nx.community.spectral_modularity_bipartition(G)
    >>> MrHi, Officer = sorted([sorted(MrHi), sorted(Officer)])
    >>> MrHi
    [0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 16, 17, 19, 21]
    >>> Officer
    [8, 9, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]

    References
    ----------
    .. [1] M. E. J. Newman *Networks: An Introduction*, pages 373--378
       Oxford University Press 2011.
    .. [2] M. E. J. Newman,
       "Modularity and community structure in networks",
       PNAS, 103 (23), p. 8577-8582,
       https://doi.org/10.1073/pnas.0601602103

    """
    import numpy as np

    B = nx.linalg.modularity_matrix(G)
    eigenvalues, eigenvectors = np.linalg.eig(B)
    index = np.argsort(eigenvalues)[-1]
    v2 = zip(np.real(eigenvectors[:, index]), G)
    left, right = set(), set()
    for u, n in v2:
        if u < 0:
            left.add(n)
        else:
            right.add(n)
    return left, right




[docs]
@nx.utils.not_implemented_for("multigraph")
def greedy_node_swap_bipartition(G, *, init_split=None, max_iter=10):
    """Split the nodes into two communities based on greedy
    modularity maximization.

    The algorithm works by selecting a node to change communities which
    will maximize the modularity. The swap is made and the community
    structure with the highest modularity is kept.

    Parameters
    ----------
    G : NetworkX graph

    init_split : 2-tuple of sets of nodes
        Pair of sets of nodes in ``G`` providing an initial bipartition
        for the algorithm. If not specified, a random balanced partition
        is used. If this pair of sets is not a partition of the nodes of `G`,
        :exc:`NetworkXException` is raised.

    max_iter : int
      Maximum number of iterations of attempting swaps to find an improvement.

    Returns
    -------
    max_split : 2-tuple of sets of nodes
        Pair of sets of nodes of ``G``, partitioned according to a
        node swap greedy modularity maximization algorithm.

    Raises
    ------
    NetworkXError
      if init_split is not a valid partition of the
      graph into two communities or if G is a MultiGraph

    Examples
    --------
    >>> G = nx.barbell_graph(3, 0)
    >>> left, right = nx.community.greedy_node_swap_bipartition(G)
    >>> # Sort the communities so the nodes appear in increasing order.
    >>> left, right = sorted([sorted(left), sorted(right)])
    >>> sorted(left)
    [0, 1, 2]
    >>> sorted(right)
    [3, 4, 5]

    Notes
    -----
    This function is not implemented for multigraphs.

    References
    ----------
    .. [1] M. E. J. Newman "Networks: An Introduction", pages 373--375.
       Oxford University Press 2011.

    """
    if init_split is None:
        m1 = len(G) // 2
        m2 = len(G) - m1
        some_nodes = set(random.sample(list(G), m1))
        other_nodes = {n for n in G if n not in some_nodes}
        best_split_so_far = (some_nodes, other_nodes)
    else:
        if not nx.community.is_partition(G, init_split):
            raise nx.NetworkXError("init_split is not a partition of G")
        if not len(init_split) == 2:
            raise nx.NetworkXError("init_split must be a bipartition")
        best_split_so_far = deepcopy(init_split)

    best_mod = nx.community.modularity(G, best_split_so_far)

    max_split, max_mod = best_split_so_far, best_mod
    its = 0
    m = G.number_of_edges()
    G_degree = dict(G.degree)

    while max_mod >= best_mod and its < max_iter:
        best_split_so_far = max_split
        best_mod = max_mod
        next_split = deepcopy(max_split)
        next_mod = max_mod
        nodes = set(G)
        while nodes:
            max_swap = -1
            max_node = None
            max_node_comm = None
            left, right = next_split
            leftd = sum(G_degree[n] for n in left)
            rightd = sum(G_degree[n] for n in right)
            for n in nodes:
                if n in left:
                    in_comm, out_comm = left, right
                    in_deg, out_deg = leftd, rightd
                else:
                    in_comm, out_comm = right, left
                    in_deg, out_deg = rightd, leftd

                d_eii = -len(G[n].keys() & in_comm) / m
                d_ejj = len(G[n].keys() & out_comm) / m
                deg = G_degree[n]
                d_sum_ai = (deg / (2 * m**2)) * (in_deg - out_deg - deg)
                swap_change = d_eii + d_ejj + d_sum_ai

                if swap_change > max_swap:
                    max_swap = swap_change
                    max_node = n
                    max_node_comm = in_comm
                    non_max_node_comm = out_comm
            # swap the node from one comm to the other
            max_node_comm.remove(max_node)
            non_max_node_comm.add(max_node)
            next_mod += max_swap
            # deepcopy next_split each time it reaches a high (might go lower later)
            if next_mod > max_mod:
                max_split, max_mod = deepcopy(next_split), next_mod
            nodes.remove(max_node)
        its += 1
    return best_split_so_far