Source code for networkx.algorithms.community.bipartitions

"""Functions for splitting a network into two communities (finding a bipartition)."""

import random
from copy import deepcopy
from itertools import count

import networkx as nx

__all__ = [
    "kernighan_lin_bisection",
    "spectral_modularity_bipartition",
    "greedy_node_swap_bipartition",
]


def _kernighan_lin_sweep(edge_info, side):
    """
    This is a modified form of Kernighan-Lin, which moves single nodes at a
    time, alternating between sides to keep the bisection balanced.  We keep
    two min-heaps of swap costs to make optimal-next-move selection fast.
    """
    heap0, heap1 = cost_heaps = nx.utils.BinaryHeap(), nx.utils.BinaryHeap()
    # we use heap methods insert, pop, and get
    for u, nbrs in edge_info.items():
        cost_u = sum(wt if side[v] else -wt for v, wt in nbrs.items())
        if side[u]:
            heap1.insert(u, cost_u)
        else:
            heap0.insert(u, -cost_u)

    def _update_heap_values(node):
        side_node = side[node]
        for nbr, wt in edge_info[node].items():
            side_nbr = side[nbr]
            if side_nbr == side_node:
                wt = -wt
            heap_nbr = cost_heaps[side_nbr]
            if nbr in heap_nbr:
                cost_nbr = heap_nbr.get(nbr) + 2 * wt
                # allow_increase lets us update a value already on the heap
                heap_nbr.insert(nbr, cost_nbr, allow_increase=True)

    i = 0
    totcost = 0
    while heap0 and heap1:
        u, cost_u = heap0.pop()
        _update_heap_values(u)
        v, cost_v = heap1.pop()
        _update_heap_values(v)
        totcost += cost_u + cost_v
        i += 1
        yield totcost, i, (u, v)


[docs] @nx.utils.not_implemented_for("directed") @nx.utils.py_random_state(4) @nx._dispatchable(edge_attrs="weight") def kernighan_lin_bisection(G, partition=None, max_iter=10, weight="weight", seed=None): """Partition a graph into two blocks using the Kernighan–Lin algorithm. This algorithm partitions a network into two sets by iteratively swapping pairs of nodes to reduce the edge cut between the two sets. The pairs are chosen according to a modified form of Kernighan-Lin [1]_, which moves nodes individually, alternating between sides to keep the bisection balanced. Kernighan-Lin is an approximate algorithm for maximal modularity bisection. In [2]_ they suggest that fine-tuned improvements can be made using greedy node swapping, (see `greedy_node_swap_bipartition`). The improvements are typically only a few percent of the modularity value. But they claim that can make a difference between a good and excellent method. This function does not perform any improvements. But you can do that yourself. Parameters ---------- G : NetworkX graph Graph must be undirected. partition : tuple Pair of iterables containing an initial partition. If not specified, a random balanced partition is used. max_iter : int Maximum number of times to attempt swaps to find an improvement before giving up. weight : string or function (default: "weight") If this is a string, then edge weights will be accessed via the edge attribute with this key (that is, the weight of the edge joining `u` to `v` will be ``G.edges[u, v][weight]``). If no such edge attribute exists, the weight of the edge is assumed to be one. If this is a function, the weight of an edge is the value returned by the function. The function must accept exactly three positional arguments: the two endpoints of an edge and the dictionary of edge attributes for that edge. The function must return a number or None to indicate a hidden edge. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. Only used if partition is None Returns ------- partition : tuple A pair of sets of nodes representing the bipartition. Raises ------ NetworkXError If `partition` is not a valid partition of the nodes of the graph. References ---------- .. [1] Kernighan, B. W.; Lin, Shen (1970). "An efficient heuristic procedure for partitioning graphs." *Bell Systems Technical Journal* 49: 291--307. Oxford University Press 2011. .. [2] M. E. J. Newman, "Modularity and community structure in networks", PNAS, 103 (23), p. 8577-8582, https://doi.org/10.1073/pnas.0601602103 """ nodes = list(G) if partition is None: seed.shuffle(nodes) mid = len(nodes) // 2 A, B = nodes[:mid], nodes[mid:] else: try: A, B = partition except (TypeError, ValueError) as err: raise nx.NetworkXError("partition must be two sets") from err if not nx.community.is_partition(G, [A, B]): raise nx.NetworkXError("partition invalid") side = {node: (node in A) for node in nodes} # ruff: noqa: E731 skips check for no lambda functions # Using shortest_paths _weight_function with sum instead of min on multiedges if callable(weight): sum_weight = weight elif G.is_multigraph(): sum_weight = lambda u, v, d: sum(dd.get(weight, 1) for dd in d.values()) else: sum_weight = lambda u, v, d: d.get(weight, 1) edge_info = { u: {v: wt for v, d in nbrs.items() if (wt := sum_weight(u, v, d)) is not None} for u, nbrs in G._adj.items() } for i in range(max_iter): costs = list(_kernighan_lin_sweep(edge_info, side)) # find out how many edges to update: min_i min_cost, min_i, _ = min(costs) if min_cost >= 0: break for _, _, (u, v) in costs[:min_i]: side[u] = 1 side[v] = 0 part1 = {u for u, s in side.items() if s == 0} part2 = {u for u, s in side.items() if s == 1} return part1, part2
[docs] @nx.utils.not_implemented_for("directed") @nx.utils.not_implemented_for("multigraph") def spectral_modularity_bipartition(G): r"""Return a bipartition of the nodes based on the spectrum of the modularity matrix of the graph. This method calculates the eigenvector associated with the second largest eigenvalue of the modularity matrix, where the modularity matrix *B* is defined by ..math:: B_{i j} = A_{i j} - \frac{k_i k_j}{2 m}, where *A* is the adjacency matrix, `k_i` is the degree of node *i*, and *m* is the number of edges in the graph. Nodes whose corresponding values in the eigenvector are negative are placed in one block, nodes whose values are nonnegative are placed in another block. Parameters ---------- G : NetworkX graph Returns ------- C : tuple Pair of communities as two sets of nodes of ``G``, partitioned according to second largest eigenvalue of the modularity matrix. Examples -------- >>> G = nx.karate_club_graph() >>> MrHi, Officer = nx.community.spectral_modularity_bipartition(G) >>> MrHi, Officer = sorted([sorted(MrHi), sorted(Officer)]) >>> MrHi [0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 16, 17, 19, 21] >>> Officer [8, 9, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] References ---------- .. [1] M. E. J. Newman *Networks: An Introduction*, pages 373--378 Oxford University Press 2011. .. [2] M. E. J. Newman, "Modularity and community structure in networks", PNAS, 103 (23), p. 8577-8582, https://doi.org/10.1073/pnas.0601602103 """ import numpy as np B = nx.linalg.modularity_matrix(G) eigenvalues, eigenvectors = np.linalg.eig(B) index = np.argsort(eigenvalues)[-1] v2 = zip(np.real(eigenvectors[:, index]), G) left, right = set(), set() for u, n in v2: if u < 0: left.add(n) else: right.add(n) return left, right
[docs] @nx.utils.not_implemented_for("multigraph") def greedy_node_swap_bipartition(G, *, init_split=None, max_iter=10): """Split the nodes into two communities based on greedy modularity maximization. The algorithm works by selecting a node to change communities which will maximize the modularity. The swap is made and the community structure with the highest modularity is kept. Parameters ---------- G : NetworkX graph init_split : 2-tuple of sets of nodes Pair of sets of nodes in ``G`` providing an initial bipartition for the algorithm. If not specified, a random balanced partition is used. If this pair of sets is not a partition of the nodes of `G`, :exc:`NetworkXException` is raised. max_iter : int Maximum number of iterations of attempting swaps to find an improvement. Returns ------- max_split : 2-tuple of sets of nodes Pair of sets of nodes of ``G``, partitioned according to a node swap greedy modularity maximization algorithm. Raises ------ NetworkXError if init_split is not a valid partition of the graph into two communities or if G is a MultiGraph Examples -------- >>> G = nx.barbell_graph(3, 0) >>> left, right = nx.community.greedy_node_swap_bipartition(G) >>> # Sort the communities so the nodes appear in increasing order. >>> left, right = sorted([sorted(left), sorted(right)]) >>> sorted(left) [0, 1, 2] >>> sorted(right) [3, 4, 5] Notes ----- This function is not implemented for multigraphs. References ---------- .. [1] M. E. J. Newman "Networks: An Introduction", pages 373--375. Oxford University Press 2011. """ if init_split is None: m1 = len(G) // 2 m2 = len(G) - m1 some_nodes = set(random.sample(list(G), m1)) other_nodes = {n for n in G if n not in some_nodes} best_split_so_far = (some_nodes, other_nodes) else: if not nx.community.is_partition(G, init_split): raise nx.NetworkXError("init_split is not a partition of G") if not len(init_split) == 2: raise nx.NetworkXError("init_split must be a bipartition") best_split_so_far = deepcopy(init_split) best_mod = nx.community.modularity(G, best_split_so_far) max_split, max_mod = best_split_so_far, best_mod its = 0 m = G.number_of_edges() G_degree = dict(G.degree) while max_mod >= best_mod and its < max_iter: best_split_so_far = max_split best_mod = max_mod next_split = deepcopy(max_split) next_mod = max_mod nodes = set(G) while nodes: max_swap = -1 max_node = None max_node_comm = None left, right = next_split leftd = sum(G_degree[n] for n in left) rightd = sum(G_degree[n] for n in right) for n in nodes: if n in left: in_comm, out_comm = left, right in_deg, out_deg = leftd, rightd else: in_comm, out_comm = right, left in_deg, out_deg = rightd, leftd d_eii = -len(G[n].keys() & in_comm) / m d_ejj = len(G[n].keys() & out_comm) / m deg = G_degree[n] d_sum_ai = (deg / (2 * m**2)) * (in_deg - out_deg - deg) swap_change = d_eii + d_ejj + d_sum_ai if swap_change > max_swap: max_swap = swap_change max_node = n max_node_comm = in_comm non_max_node_comm = out_comm # swap the node from one comm to the other max_node_comm.remove(max_node) non_max_node_comm.add(max_node) next_mod += max_swap # deepcopy next_split each time it reaches a high (might go lower later) if next_mod > max_mod: max_split, max_mod = deepcopy(next_split), next_mod nodes.remove(max_node) its += 1 return best_split_so_far