"""Functions for splitting a network into two communities (finding a bipartition)."""
import random
from copy import deepcopy
from itertools import count
import networkx as nx
__all__ = [
"kernighan_lin_bisection",
"spectral_modularity_bipartition",
"greedy_node_swap_bipartition",
]
def _kernighan_lin_sweep(edge_info, side):
"""
This is a modified form of Kernighan-Lin, which moves single nodes at a
time, alternating between sides to keep the bisection balanced. We keep
two min-heaps of swap costs to make optimal-next-move selection fast.
"""
heap0, heap1 = cost_heaps = nx.utils.BinaryHeap(), nx.utils.BinaryHeap()
# we use heap methods insert, pop, and get
for u, nbrs in edge_info.items():
cost_u = sum(wt if side[v] else -wt for v, wt in nbrs.items())
if side[u]:
heap1.insert(u, cost_u)
else:
heap0.insert(u, -cost_u)
def _update_heap_values(node):
side_node = side[node]
for nbr, wt in edge_info[node].items():
side_nbr = side[nbr]
if side_nbr == side_node:
wt = -wt
heap_nbr = cost_heaps[side_nbr]
if nbr in heap_nbr:
cost_nbr = heap_nbr.get(nbr) + 2 * wt
# allow_increase lets us update a value already on the heap
heap_nbr.insert(nbr, cost_nbr, allow_increase=True)
i = 0
totcost = 0
while heap0 and heap1:
u, cost_u = heap0.pop()
_update_heap_values(u)
v, cost_v = heap1.pop()
_update_heap_values(v)
totcost += cost_u + cost_v
i += 1
yield totcost, i, (u, v)
[docs]
@nx.utils.not_implemented_for("directed")
@nx.utils.py_random_state(4)
@nx._dispatchable(edge_attrs="weight")
def kernighan_lin_bisection(G, partition=None, max_iter=10, weight="weight", seed=None):
"""Partition a graph into two blocks using the Kernighan–Lin algorithm.
This algorithm partitions a network into two sets by iteratively
swapping pairs of nodes to reduce the edge cut between the two sets. The
pairs are chosen according to a modified form of Kernighan-Lin [1]_, which
moves nodes individually, alternating between sides to keep the bisection
balanced.
Kernighan-Lin is an approximate algorithm for maximal modularity bisection.
In [2]_ they suggest that fine-tuned improvements can be made using
greedy node swapping, (see `greedy_node_swap_bipartition`).
The improvements are typically only a few percent of the modularity value.
But they claim that can make a difference between a good and excellent method.
This function does not perform any improvements. But you can do that yourself.
Parameters
----------
G : NetworkX graph
Graph must be undirected.
partition : tuple
Pair of iterables containing an initial partition. If not
specified, a random balanced partition is used.
max_iter : int
Maximum number of times to attempt swaps to find an
improvement before giving up.
weight : string or function (default: "weight")
If this is a string, then edge weights will be accessed via the
edge attribute with this key (that is, the weight of the edge
joining `u` to `v` will be ``G.edges[u, v][weight]``). If no
such edge attribute exists, the weight of the edge is assumed to
be one.
If this is a function, the weight of an edge is the value
returned by the function. The function must accept exactly three
positional arguments: the two endpoints of an edge and the
dictionary of edge attributes for that edge. The function must
return a number or None to indicate a hidden edge.
seed : integer, random_state, or None (default)
Indicator of random number generation state.
See :ref:`Randomness<randomness>`.
Only used if partition is None
Returns
-------
partition : tuple
A pair of sets of nodes representing the bipartition.
Raises
------
NetworkXError
If `partition` is not a valid partition of the nodes of the graph.
References
----------
.. [1] Kernighan, B. W.; Lin, Shen (1970).
"An efficient heuristic procedure for partitioning graphs."
*Bell Systems Technical Journal* 49: 291--307.
Oxford University Press 2011.
.. [2] M. E. J. Newman,
"Modularity and community structure in networks",
PNAS, 103 (23), p. 8577-8582,
https://doi.org/10.1073/pnas.0601602103
"""
nodes = list(G)
if partition is None:
seed.shuffle(nodes)
mid = len(nodes) // 2
A, B = nodes[:mid], nodes[mid:]
else:
try:
A, B = partition
except (TypeError, ValueError) as err:
raise nx.NetworkXError("partition must be two sets") from err
if not nx.community.is_partition(G, [A, B]):
raise nx.NetworkXError("partition invalid")
side = {node: (node in A) for node in nodes}
# ruff: noqa: E731 skips check for no lambda functions
# Using shortest_paths _weight_function with sum instead of min on multiedges
if callable(weight):
sum_weight = weight
elif G.is_multigraph():
sum_weight = lambda u, v, d: sum(dd.get(weight, 1) for dd in d.values())
else:
sum_weight = lambda u, v, d: d.get(weight, 1)
edge_info = {
u: {v: wt for v, d in nbrs.items() if (wt := sum_weight(u, v, d)) is not None}
for u, nbrs in G._adj.items()
}
for i in range(max_iter):
costs = list(_kernighan_lin_sweep(edge_info, side))
# find out how many edges to update: min_i
min_cost, min_i, _ = min(costs)
if min_cost >= 0:
break
for _, _, (u, v) in costs[:min_i]:
side[u] = 1
side[v] = 0
part1 = {u for u, s in side.items() if s == 0}
part2 = {u for u, s in side.items() if s == 1}
return part1, part2
[docs]
@nx.utils.not_implemented_for("directed")
@nx.utils.not_implemented_for("multigraph")
def spectral_modularity_bipartition(G):
r"""Return a bipartition of the nodes based on the spectrum of the
modularity matrix of the graph.
This method calculates the eigenvector associated with the second
largest eigenvalue of the modularity matrix, where the modularity
matrix *B* is defined by
..math::
B_{i j} = A_{i j} - \frac{k_i k_j}{2 m},
where *A* is the adjacency matrix, `k_i` is the degree of node *i*,
and *m* is the number of edges in the graph. Nodes whose
corresponding values in the eigenvector are negative are placed in
one block, nodes whose values are nonnegative are placed in another
block.
Parameters
----------
G : NetworkX graph
Returns
-------
C : tuple
Pair of communities as two sets of nodes of ``G``, partitioned
according to second largest eigenvalue of the modularity matrix.
Examples
--------
>>> G = nx.karate_club_graph()
>>> MrHi, Officer = nx.community.spectral_modularity_bipartition(G)
>>> MrHi, Officer = sorted([sorted(MrHi), sorted(Officer)])
>>> MrHi
[0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 16, 17, 19, 21]
>>> Officer
[8, 9, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
References
----------
.. [1] M. E. J. Newman *Networks: An Introduction*, pages 373--378
Oxford University Press 2011.
.. [2] M. E. J. Newman,
"Modularity and community structure in networks",
PNAS, 103 (23), p. 8577-8582,
https://doi.org/10.1073/pnas.0601602103
"""
import numpy as np
B = nx.linalg.modularity_matrix(G)
eigenvalues, eigenvectors = np.linalg.eig(B)
index = np.argsort(eigenvalues)[-1]
v2 = zip(np.real(eigenvectors[:, index]), G)
left, right = set(), set()
for u, n in v2:
if u < 0:
left.add(n)
else:
right.add(n)
return left, right
[docs]
@nx.utils.not_implemented_for("multigraph")
def greedy_node_swap_bipartition(G, *, init_split=None, max_iter=10):
"""Split the nodes into two communities based on greedy
modularity maximization.
The algorithm works by selecting a node to change communities which
will maximize the modularity. The swap is made and the community
structure with the highest modularity is kept.
Parameters
----------
G : NetworkX graph
init_split : 2-tuple of sets of nodes
Pair of sets of nodes in ``G`` providing an initial bipartition
for the algorithm. If not specified, a random balanced partition
is used. If this pair of sets is not a partition of the nodes of `G`,
:exc:`NetworkXException` is raised.
max_iter : int
Maximum number of iterations of attempting swaps to find an improvement.
Returns
-------
max_split : 2-tuple of sets of nodes
Pair of sets of nodes of ``G``, partitioned according to a
node swap greedy modularity maximization algorithm.
Raises
------
NetworkXError
if init_split is not a valid partition of the
graph into two communities or if G is a MultiGraph
Examples
--------
>>> G = nx.barbell_graph(3, 0)
>>> left, right = nx.community.greedy_node_swap_bipartition(G)
>>> # Sort the communities so the nodes appear in increasing order.
>>> left, right = sorted([sorted(left), sorted(right)])
>>> sorted(left)
[0, 1, 2]
>>> sorted(right)
[3, 4, 5]
Notes
-----
This function is not implemented for multigraphs.
References
----------
.. [1] M. E. J. Newman "Networks: An Introduction", pages 373--375.
Oxford University Press 2011.
"""
if init_split is None:
m1 = len(G) // 2
m2 = len(G) - m1
some_nodes = set(random.sample(list(G), m1))
other_nodes = {n for n in G if n not in some_nodes}
best_split_so_far = (some_nodes, other_nodes)
else:
if not nx.community.is_partition(G, init_split):
raise nx.NetworkXError("init_split is not a partition of G")
if not len(init_split) == 2:
raise nx.NetworkXError("init_split must be a bipartition")
best_split_so_far = deepcopy(init_split)
best_mod = nx.community.modularity(G, best_split_so_far)
max_split, max_mod = best_split_so_far, best_mod
its = 0
m = G.number_of_edges()
G_degree = dict(G.degree)
while max_mod >= best_mod and its < max_iter:
best_split_so_far = max_split
best_mod = max_mod
next_split = deepcopy(max_split)
next_mod = max_mod
nodes = set(G)
while nodes:
max_swap = -1
max_node = None
max_node_comm = None
left, right = next_split
leftd = sum(G_degree[n] for n in left)
rightd = sum(G_degree[n] for n in right)
for n in nodes:
if n in left:
in_comm, out_comm = left, right
in_deg, out_deg = leftd, rightd
else:
in_comm, out_comm = right, left
in_deg, out_deg = rightd, leftd
d_eii = -len(G[n].keys() & in_comm) / m
d_ejj = len(G[n].keys() & out_comm) / m
deg = G_degree[n]
d_sum_ai = (deg / (2 * m**2)) * (in_deg - out_deg - deg)
swap_change = d_eii + d_ejj + d_sum_ai
if swap_change > max_swap:
max_swap = swap_change
max_node = n
max_node_comm = in_comm
non_max_node_comm = out_comm
# swap the node from one comm to the other
max_node_comm.remove(max_node)
non_max_node_comm.add(max_node)
next_mod += max_swap
# deepcopy next_split each time it reaches a high (might go lower later)
if next_mod > max_mod:
max_split, max_mod = deepcopy(next_split), next_mod
nodes.remove(max_node)
its += 1
return best_split_so_far