r"""Computation of graph non-randomness."""
import math
import networkx as nx
from networkx.utils import not_implemented_for
__all__ = ["non_randomness"]
[docs]
@not_implemented_for("directed")
@not_implemented_for("multigraph")
@nx._dispatchable(edge_attrs="weight")
def non_randomness(G, k=None, weight="weight"):
"""Compute the non-randomness of a graph.
The first value $R_G$ is the sum of non-randomness values of all
edges within the graph (where the non-randomness of an edge tends to be
small when the two nodes linked by that edge are from two different
communities).
The second value $R_G^*$ is a relative measure that indicates
to what extent `G` is different from a random graph in terms
of probability. The closer it is to 0, the higher the likelihood
the graph was generated by an Erdős--Rényi model.
Parameters
----------
G : NetworkX graph
Graph must be undirected, connected, and without self-loops.
k : int or None, optional (default=None)
The number of communities in `G`.
If `k` is not set, the function uses a default community detection
algorithm (:func:`~networkx.algorithms.community.label_propagation_communities`)
to set it.
weight : string or None, optional (default="weight")
The name of an edge attribute that holds the numerical value used
as a weight. If `None`, then each edge has weight 1, i.e., the graph is
binary.
Returns
-------
(float, float) tuple
The first value is $R_G$, the non-randomness of the graph,
the second is $R_G^*$, the relative non-randomness
w.r.t. the Erdős--Rényi model.
Raises
------
NetworkXNotImplemented
If the input graph is directed or a multigraph.
NetworkXException
If the input graph is not connected.
NetworkXError
If the input graph contains self-loops or has no edges.
ValueError
If `k` is not in $\\{1, \\dots, n-1\\}$, where $n$ is the number of nodes,
or if `k` is such that the computed edge probability
$p = \\frac{2km}{n(n-k)}$ does not satisfy $0 < p < 1$.
Examples
--------
>>> G = nx.karate_club_graph()
>>> nr, nr_rd = nx.non_randomness(G, 2)
>>> nr, nr_rd = nx.non_randomness(G, 2, "weight")
When the number of communities `k` is not specified,
:func:`~networkx.algorithms.community.label_propagation_communities`
is used to compute it.
This algorithm can give different results depending on
the order of nodes and edges in the graph.
For example, while the following graphs are identical,
computing the non-randomness of each of them yields different results:
>>> G1, G2 = nx.Graph(), nx.Graph()
>>> G1.add_edges_from([(0, 1), (1, 2), (1, 3), (3, 4)])
>>> G2.add_edges_from([(0, 1), (1, 3), (1, 2), (3, 4)])
>>> [round(r, 6) for r in nx.non_randomness(G1)]
[-1.847759, -5.842437]
>>> [round(r, 6) for r in nx.non_randomness(G2)]
Traceback (most recent call last):
...
ValueError: invalid number of communities for graph with 5 nodes and 4 edges: 2
This is because the community detection algorithm finds
1 community in `G1` and 2 communities in `G2`.
This can be resolved by specifying the number of communities `k`:
>>> [round(r, 6) for r in nx.non_randomness(G2, k=1)]
[-1.847759, -5.842437]
Notes
-----
If a `weight` argument is passed, this algorithm will use the eigenvalues
of the weighted adjacency matrix instead.
The output of this function corresponds to (4.4) and (4.5) in [1]_.
A lower value of $R^*_G$ indicates a more random graph;
one can think of $1 - \\Phi(R_G^*)$ as the similarity
between the graph and a random graph,
where $\\Phi(x)$ is the cumulative distribution function
of the standard normal distribution.
Theorem 2 in [2]_ states that for any graph $G$
with $n$ nodes, $m$ edges, and $k$ communities,
its non-randomness is bounded below by the non-randomness of an
$r$-regular graph (a graph where each node has degree $r$),
and bounded above by the non-randomness of an $l$-complete graph
(a graph where each community is a clique of $l$ nodes).
References
----------
.. [1] Xiaowei Ying and Xintao Wu,
On Randomness Measures for Social Networks,
SIAM International Conference on Data Mining. 2009
https://doi.org/10.1137/1.9781611972795.61
.. [2] Ying, Xiaowei & Wu, Leting & Wu, Xintao. (2012).
A Spectrum-Based Framework for Quantifying Randomness of Social Networks.
IEEE Transactions on Knowledge and Data Engineering 23(12):1842--1856.
https://dl.acm.org/doi/abs/10.1109/TKDE.2010.218
"""
import numpy as np
# corner case: graph has no edges
if nx.is_empty(G):
raise nx.NetworkXError("non_randomness not applicable to empty graphs")
if not nx.is_connected(G):
raise nx.NetworkXException("Non connected graph.")
if len(list(nx.selfloop_edges(G))) > 0:
raise nx.NetworkXError("Graph must not contain self-loops")
n = G.number_of_nodes()
m = G.number_of_edges()
if k is None:
k = len(tuple(nx.community.label_propagation_communities(G)))
if not 1 <= k < n or not 0 < (p := (2 * k * m) / (n * (n - k))) < 1:
err = (
f"invalid number of communities for graph with {n} nodes and {m} edges: {k}"
)
raise ValueError(err)
# eq. 4.4
eigenvalues = np.linalg.eigvals(nx.to_numpy_array(G, weight=weight))
nr = float(np.real(np.sum(eigenvalues[:k])))
# eq. 4.5
nr_rd = (nr - ((n - 2 * k) * p + k)) / math.sqrt(2 * k * p * (1 - p))
return nr, nr_rd