"""Current-flow betweenness centrality measures."""
import networkx as nx
from networkx.algorithms.centrality.flow_matrix import (
CGInverseLaplacian,
flow_matrix_row,
FullInverseLaplacian,
SuperLUInverseLaplacian,
)
from networkx.utils import (
not_implemented_for,
reverse_cuthill_mckee_ordering,
py_random_state,
)
__all__ = [
"current_flow_betweenness_centrality",
"approximate_current_flow_betweenness_centrality",
"edge_current_flow_betweenness_centrality",
]
[docs]@py_random_state(7)
@not_implemented_for("directed")
def approximate_current_flow_betweenness_centrality(
G,
normalized=True,
weight=None,
dtype=float,
solver="full",
epsilon=0.5,
kmax=10000,
seed=None,
):
r"""Compute the approximate current-flow betweenness centrality for nodes.
Approximates the current-flow betweenness centrality within absolute
error of epsilon with high probability [1]_.
Parameters
----------
G : graph
A NetworkX graph
normalized : bool, optional (default=True)
If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
n is the number of nodes in G.
weight : string or None, optional (default=None)
Key for edge data used as the edge weight.
If None, then use 1 as each edge weight.
The weight reflects the capacity or the strength of the
edge.
dtype : data type (float)
Default data type for internal matrices.
Set to np.float32 for lower memory consumption.
solver : string (default='full')
Type of linear solver to use for computing the flow matrix.
Options are "full" (uses most memory), "lu" (recommended), and
"cg" (uses least memory).
epsilon: float
Absolute error tolerance.
kmax: int
Maximum number of sample node pairs to use for approximation.
seed : integer, random_state, or None (default)
Indicator of random number generation state.
See :ref:`Randomness<randomness>`.
Returns
-------
nodes : dictionary
Dictionary of nodes with betweenness centrality as the value.
See Also
--------
current_flow_betweenness_centrality
Notes
-----
The running time is $O((1/\epsilon^2)m{\sqrt k} \log n)$
and the space required is $O(m)$ for $n$ nodes and $m$ edges.
If the edges have a 'weight' attribute they will be used as
weights in this algorithm. Unspecified weights are set to 1.
References
----------
.. [1] Ulrik Brandes and Daniel Fleischer:
Centrality Measures Based on Current Flow.
Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
https://doi.org/10.1007/978-3-540-31856-9_44
"""
import numpy as np
if not nx.is_connected(G):
raise nx.NetworkXError("Graph not connected.")
solvername = {
"full": FullInverseLaplacian,
"lu": SuperLUInverseLaplacian,
"cg": CGInverseLaplacian,
}
n = G.number_of_nodes()
ordering = list(reverse_cuthill_mckee_ordering(G))
# make a copy with integer labels according to rcm ordering
# this could be done without a copy if we really wanted to
H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
L = nx.laplacian_matrix(H, nodelist=range(n), weight=weight).asformat("csc")
L = L.astype(dtype)
C = solvername[solver](L, dtype=dtype) # initialize solver
betweenness = dict.fromkeys(H, 0.0)
nb = (n - 1.0) * (n - 2.0) # normalization factor
cstar = n * (n - 1) / nb
l = 1 # parameter in approximation, adjustable
k = l * int(np.ceil((cstar / epsilon) ** 2 * np.log(n)))
if k > kmax:
msg = f"Number random pairs k>kmax ({k}>{kmax}) "
raise nx.NetworkXError(msg, "Increase kmax or epsilon")
cstar2k = cstar / (2 * k)
for i in range(k):
s, t = seed.sample(range(n), 2)
b = np.zeros(n, dtype=dtype)
b[s] = 1
b[t] = -1
p = C.solve(b)
for v in H:
if v == s or v == t:
continue
for nbr in H[v]:
w = H[v][nbr].get(weight, 1.0)
betweenness[v] += w * np.abs(p[v] - p[nbr]) * cstar2k
if normalized:
factor = 1.0
else:
factor = nb / 2.0
# remap to original node names and "unnormalize" if required
return {ordering[k]: float(v * factor) for k, v in betweenness.items()}
[docs]@not_implemented_for("directed")
def current_flow_betweenness_centrality(
G, normalized=True, weight=None, dtype=float, solver="full"
):
r"""Compute current-flow betweenness centrality for nodes.
Current-flow betweenness centrality uses an electrical current
model for information spreading in contrast to betweenness
centrality which uses shortest paths.
Current-flow betweenness centrality is also known as
random-walk betweenness centrality [2]_.
Parameters
----------
G : graph
A NetworkX graph
normalized : bool, optional (default=True)
If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
n is the number of nodes in G.
weight : string or None, optional (default=None)
Key for edge data used as the edge weight.
If None, then use 1 as each edge weight.
The weight reflects the capacity or the strength of the
edge.
dtype : data type (float)
Default data type for internal matrices.
Set to np.float32 for lower memory consumption.
solver : string (default='full')
Type of linear solver to use for computing the flow matrix.
Options are "full" (uses most memory), "lu" (recommended), and
"cg" (uses least memory).
Returns
-------
nodes : dictionary
Dictionary of nodes with betweenness centrality as the value.
See Also
--------
approximate_current_flow_betweenness_centrality
betweenness_centrality
edge_betweenness_centrality
edge_current_flow_betweenness_centrality
Notes
-----
Current-flow betweenness can be computed in $O(I(n-1)+mn \log n)$
time [1]_, where $I(n-1)$ is the time needed to compute the
inverse Laplacian. For a full matrix this is $O(n^3)$ but using
sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
Laplacian matrix condition number.
The space required is $O(nw)$ where $w$ is the width of the sparse
Laplacian matrix. Worse case is $w=n$ for $O(n^2)$.
If the edges have a 'weight' attribute they will be used as
weights in this algorithm. Unspecified weights are set to 1.
References
----------
.. [1] Centrality Measures Based on Current Flow.
Ulrik Brandes and Daniel Fleischer,
Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
https://doi.org/10.1007/978-3-540-31856-9_44
.. [2] A measure of betweenness centrality based on random walks,
M. E. J. Newman, Social Networks 27, 39-54 (2005).
"""
if not nx.is_connected(G):
raise nx.NetworkXError("Graph not connected.")
n = G.number_of_nodes()
ordering = list(reverse_cuthill_mckee_ordering(G))
# make a copy with integer labels according to rcm ordering
# this could be done without a copy if we really wanted to
H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
betweenness = dict.fromkeys(H, 0.0) # b[v]=0 for v in H
for row, (s, t) in flow_matrix_row(H, weight=weight, dtype=dtype, solver=solver):
pos = dict(zip(row.argsort()[::-1], range(n)))
for i in range(n):
betweenness[s] += (i - pos[i]) * row[i]
betweenness[t] += (n - i - 1 - pos[i]) * row[i]
if normalized:
nb = (n - 1.0) * (n - 2.0) # normalization factor
else:
nb = 2.0
for v in H:
betweenness[v] = float((betweenness[v] - v) * 2.0 / nb)
return {ordering[k]: v for k, v in betweenness.items()}
[docs]@not_implemented_for("directed")
def edge_current_flow_betweenness_centrality(
G, normalized=True, weight=None, dtype=float, solver="full"
):
r"""Compute current-flow betweenness centrality for edges.
Current-flow betweenness centrality uses an electrical current
model for information spreading in contrast to betweenness
centrality which uses shortest paths.
Current-flow betweenness centrality is also known as
random-walk betweenness centrality [2]_.
Parameters
----------
G : graph
A NetworkX graph
normalized : bool, optional (default=True)
If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
n is the number of nodes in G.
weight : string or None, optional (default=None)
Key for edge data used as the edge weight.
If None, then use 1 as each edge weight.
The weight reflects the capacity or the strength of the
edge.
dtype : data type (default=float)
Default data type for internal matrices.
Set to np.float32 for lower memory consumption.
solver : string (default='full')
Type of linear solver to use for computing the flow matrix.
Options are "full" (uses most memory), "lu" (recommended), and
"cg" (uses least memory).
Returns
-------
nodes : dictionary
Dictionary of edge tuples with betweenness centrality as the value.
Raises
------
NetworkXError
The algorithm does not support DiGraphs.
If the input graph is an instance of DiGraph class, NetworkXError
is raised.
See Also
--------
betweenness_centrality
edge_betweenness_centrality
current_flow_betweenness_centrality
Notes
-----
Current-flow betweenness can be computed in $O(I(n-1)+mn \log n)$
time [1]_, where $I(n-1)$ is the time needed to compute the
inverse Laplacian. For a full matrix this is $O(n^3)$ but using
sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
Laplacian matrix condition number.
The space required is $O(nw)$ where $w$ is the width of the sparse
Laplacian matrix. Worse case is $w=n$ for $O(n^2)$.
If the edges have a 'weight' attribute they will be used as
weights in this algorithm. Unspecified weights are set to 1.
References
----------
.. [1] Centrality Measures Based on Current Flow.
Ulrik Brandes and Daniel Fleischer,
Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
https://doi.org/10.1007/978-3-540-31856-9_44
.. [2] A measure of betweenness centrality based on random walks,
M. E. J. Newman, Social Networks 27, 39-54 (2005).
"""
from networkx.utils import reverse_cuthill_mckee_ordering
if not nx.is_connected(G):
raise nx.NetworkXError("Graph not connected.")
n = G.number_of_nodes()
ordering = list(reverse_cuthill_mckee_ordering(G))
# make a copy with integer labels according to rcm ordering
# this could be done without a copy if we really wanted to
H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
edges = (tuple(sorted((u, v))) for u, v in H.edges())
betweenness = dict.fromkeys(edges, 0.0)
if normalized:
nb = (n - 1.0) * (n - 2.0) # normalization factor
else:
nb = 2.0
for row, (e) in flow_matrix_row(H, weight=weight, dtype=dtype, solver=solver):
pos = dict(zip(row.argsort()[::-1], range(1, n + 1)))
for i in range(n):
betweenness[e] += (i + 1 - pos[i]) * row[i]
betweenness[e] += (n - i - pos[i]) * row[i]
betweenness[e] /= nb
return {(ordering[s], ordering[t]): float(v) for (s, t), v in betweenness.items()}