# Source code for networkx.algorithms.community.quality

"""Functions for measuring the quality of a partition (into
communities).

"""

from itertools import combinations

import networkx as nx
from networkx import NetworkXError
from networkx.algorithms.community.community_utils import is_partition
from networkx.utils import not_implemented_for
from networkx.utils.decorators import argmap

__all__ = ["modularity", "partition_quality"]

class NotAPartition(NetworkXError):
"""Raised if a given collection is not a partition."""

def __init__(self, G, collection):
msg = f"{collection} is not a valid partition of the graph {G}"
super().__init__(msg)

def _require_partition(G, partition):
"""Decorator to check that a valid partition is input to a function

Raises :exc:networkx.NetworkXError if the partition is not valid.

This decorator should be used on functions whose first two arguments
are a graph and a partition of the nodes of that graph (in that
order)::

>>> @require_partition
... def foo(G, partition):
...     print("partition is valid!")
...
>>> G = nx.complete_graph(5)
>>> partition = [{0, 1}, {2, 3}, {4}]
>>> foo(G, partition)
partition is valid!
>>> partition = [{0}, {2, 3}, {4}]
>>> foo(G, partition)
Traceback (most recent call last):
...
networkx.exception.NetworkXError: partition is not a valid partition of the nodes of G
>>> partition = [{0, 1}, {1, 2, 3}, {4}]
>>> foo(G, partition)
Traceback (most recent call last):
...
networkx.exception.NetworkXError: partition is not a valid partition of the nodes of G

"""
if is_partition(G, partition):
return G, partition
raise nx.NetworkXError("partition is not a valid partition of the nodes of G")

require_partition = argmap(_require_partition, (0, 1))

@nx._dispatch
def intra_community_edges(G, partition):
"""Returns the number of intra-community edges for a partition of G.

Parameters
----------
G : NetworkX graph.

partition : iterable of sets of nodes
This must be a partition of the nodes of G.

The "intra-community edges" are those edges joining a pair of nodes
in the same block of the partition.

"""
return sum(G.subgraph(block).size() for block in partition)

@nx._dispatch
def inter_community_edges(G, partition):
"""Returns the number of inter-community edges for a partition of G.
according to the given
partition of the nodes of G.

Parameters
----------
G : NetworkX graph.

partition : iterable of sets of nodes
This must be a partition of the nodes of G.

The *inter-community edges* are those edges joining a pair of nodes
in different blocks of the partition.

Implementation note: this function creates an intermediate graph
that may require the same amount of memory as that of G.

"""
# Alternate implementation that does not require constructing a new
# graph object (but does require constructing an affiliation
# dictionary):
#
#     aff = dict(chain.from_iterable(((v, block) for v in block)
#                                    for block in partition))
#     return sum(1 for u, v in G.edges() if aff[u] != aff[v])
#
MG = nx.MultiDiGraph if G.is_directed() else nx.MultiGraph
return nx.quotient_graph(G, partition, create_using=MG).size()

def inter_community_non_edges(G, partition):
"""Returns the number of inter-community non-edges according to the
given partition of the nodes of G.

Parameters
----------
G : NetworkX graph.

partition : iterable of sets of nodes
This must be a partition of the nodes of G.

A *non-edge* is a pair of nodes (undirected if G is undirected)
that are not adjacent in G. The *inter-community non-edges* are
those non-edges on a pair of nodes in different blocks of the
partition.

Implementation note: this function creates two intermediate graphs,
which may require up to twice the amount of memory as required to
store G.

"""
# Alternate implementation that does not require constructing two
# new graph objects (but does require constructing an affiliation
# dictionary):
#
#     aff = dict(chain.from_iterable(((v, block) for v in block)
#                                    for block in partition))
#     return sum(1 for u, v in nx.non_edges(G) if aff[u] != aff[v])
#
return inter_community_edges(nx.complement(G), partition)

[docs]def modularity(G, communities, weight="weight", resolution=1):
r"""Returns the modularity of the given partition of the graph.

Modularity is defined in [1]_ as

.. math::
Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \gamma\frac{k_ik_j}{2m}\right)
\delta(c_i,c_j)

where $m$ is the number of edges, $A$ is the adjacency matrix of G,
$k_i$ is the degree of $i$, $\gamma$ is the resolution parameter,
and $\delta(c_i, c_j)$ is 1 if $i$ and $j$ are in the same community else 0.

According to [2]_ (and verified by some algebra) this can be reduced to

.. math::
Q = \sum_{c=1}^{n}
\left[ \frac{L_c}{m} - \gamma\left( \frac{k_c}{2m} \right) ^2 \right]

where the sum iterates over all communities $c$, $m$ is the number of edges,
$L_c$ is the number of intra-community links for community $c$,
$k_c$ is the sum of degrees of the nodes in community $c$,
and $\gamma$ is the resolution parameter.

The resolution parameter sets an arbitrary tradeoff between intra-group
edges and inter-group edges. More complex grouping patterns can be
discovered by analyzing the same network with multiple values of gamma
and then combining the results [3]_. That said, it is very common to
simply use gamma=1. More on the choice of gamma is in [4]_.

The second formula is the one actually used in calculation of the modularity.
For directed graphs the second formula replaces $k_c$ with $k^{in}_c k^{out}_c$.

Parameters
----------
G : NetworkX Graph

communities : list or iterable of set of nodes
These node sets must represent a partition of G's nodes.

weight : string or None, optional (default="weight")
The edge attribute that holds the numerical value used
as a weight. If None or an edge does not have that attribute,
then that edge has weight 1.

resolution : float (default=1)
If resolution is less than 1, modularity favors larger communities.
Greater than 1 favors smaller communities.

Returns
-------
Q : float
The modularity of the partition.

Raises
------
NotAPartition
If communities is not a partition of the nodes of G.

Examples
--------
>>> import networkx.algorithms.community as nx_comm
>>> G = nx.barbell_graph(3, 0)
>>> nx_comm.modularity(G, [{0, 1, 2}, {3, 4, 5}])
0.35714285714285715
>>> nx_comm.modularity(G, nx_comm.label_propagation_communities(G))
0.35714285714285715

References
----------
.. [1] M. E. J. Newman "Networks: An Introduction", page 224.
Oxford University Press, 2011.
.. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore.
"Finding community structure in very large networks."
Phys. Rev. E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187>
.. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection"
Phys. Rev. E 74, 016110, 2006. https://doi.org/10.1103/PhysRevE.74.016110
.. [4] M. E. J. Newman, "Equivalence between modularity optimization and
maximum likelihood methods for community detection"
Phys. Rev. E 94, 052315, 2016. https://doi.org/10.1103/PhysRevE.94.052315

"""
if not isinstance(communities, list):
communities = list(communities)
if not is_partition(G, communities):
raise NotAPartition(G, communities)

directed = G.is_directed()
if directed:
out_degree = dict(G.out_degree(weight=weight))
in_degree = dict(G.in_degree(weight=weight))
m = sum(out_degree.values())
norm = 1 / m**2
else:
out_degree = in_degree = dict(G.degree(weight=weight))
deg_sum = sum(out_degree.values())
m = deg_sum / 2
norm = 1 / deg_sum**2

def community_contribution(community):
comm = set(community)
L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm)

out_degree_sum = sum(out_degree[u] for u in comm)
in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum

return L_c / m - resolution * out_degree_sum * in_degree_sum * norm

return sum(map(community_contribution, communities))

[docs]@require_partition
def partition_quality(G, partition):
"""Returns the coverage and performance of a partition of G.

The *coverage* of a partition is the ratio of the number of
intra-community edges to the total number of edges in the graph.

The *performance* of a partition is the number of
intra-community edges plus inter-community non-edges divided by the total
number of potential edges.

This algorithm has complexity $O(C^2 + L)$ where C is the number of communities and L is the number of links.

Parameters
----------
G : NetworkX graph

partition : sequence
Partition of the nodes of G, represented as a sequence of
sets of nodes (blocks). Each block of the partition represents a
community.

Returns
-------
(float, float)
The (coverage, performance) tuple of the partition, as defined above.

Raises
------
NetworkXError
If partition is not a valid partition of the nodes of G.

Notes
-----
If G is a multigraph;
- for coverage, the multiplicity of edges is counted
- for performance, the result is -1 (total number of possible edges is not defined)

References
----------
.. [1] Santo Fortunato.
"Community Detection in Graphs".
*Physical Reports*, Volume 486, Issue 3--5 pp. 75--174
<https://arxiv.org/abs/0906.0612>
"""

node_community = {}
for i, community in enumerate(partition):
for node in community:
node_community[node] = i

# performance is not defined for multigraphs
if not G.is_multigraph():
# Iterate over the communities, quadratic, to calculate possible_inter_community_edges
possible_inter_community_edges = sum(
len(p1) * len(p2) for p1, p2 in combinations(partition, 2)
)

if G.is_directed():
possible_inter_community_edges *= 2
else:
possible_inter_community_edges = 0

# Compute the number of edges in the complete graph -- n nodes,
# directed or undirected, depending on G
n = len(G)
total_pairs = n * (n - 1)
if not G.is_directed():
total_pairs //= 2

intra_community_edges = 0
inter_community_non_edges = possible_inter_community_edges

# Iterate over the links to count intra_community_edges and inter_community_non_edges
for e in G.edges():
if node_community[e[0]] == node_community[e[1]]:
intra_community_edges += 1
else:
inter_community_non_edges -= 1

coverage = intra_community_edges / len(G.edges)

if G.is_multigraph():
performance = -1.0
else:
performance = (intra_community_edges + inter_community_non_edges) / total_pairs

return coverage, performance