Warning

This documents an unmaintained version of NetworkX. Please upgrade to a maintained version and see the current NetworkX documentation.

# Source code for networkx.algorithms.cluster

# -*- coding: utf-8 -*-
"""Algorithms to characterize the number of triangles in a graph."""
from itertools import combinations
import networkx as nx
from networkx import NetworkXError
__author__ = """\n""".join(['Aric Hagberg <aric.hagberg@gmail.com>',
'Dan Schult (dschult@colgate.edu)',
'Pieter Swart (swart@lanl.gov)',
'Jordi Torrents <jtorrents@milnou.net>'])
#    Aric Hagberg <hagberg@lanl.gov>
#    Dan Schult <dschult@colgate.edu>
#    Pieter Swart <swart@lanl.gov>
__all__= ['triangles', 'average_clustering', 'clustering', 'transitivity',
'square_clustering']

[docs]def triangles(G, nodes=None):
"""Compute the number of triangles.

Finds the number of triangles that include a node as one vertex.

Parameters
----------
G : graph
A networkx graph
nodes : container of nodes, optional (default= all nodes in G)
Compute triangles for nodes in this container.

Returns
-------
out : dictionary
Number of triangles keyed by node label.

Examples
--------
>>> G=nx.complete_graph(5)
>>> print(nx.triangles(G,0))
6
>>> print(nx.triangles(G))
{0: 6, 1: 6, 2: 6, 3: 6, 4: 6}
>>> print(list(nx.triangles(G,(0,1)).values()))
[6, 6]

Notes
-----
When computing triangles for the entire graph each triangle is counted
three times, once at each node.  Self loops are ignored.

"""
if G.is_directed():
raise NetworkXError("triangles() is not defined for directed graphs.")
if nodes in G:
# return single value
return next(_triangles_and_degree_iter(G,nodes))[2] // 2
return dict( (v,t // 2) for v,d,t in _triangles_and_degree_iter(G,nodes))

def _triangles_and_degree_iter(G,nodes=None):
""" Return an iterator of (node, degree, triangles).

This double counts triangles so you may want to divide by 2.
See degree() and triangles() for definitions and details.

"""
if G.is_multigraph():
raise NetworkXError("Not defined for multigraphs.")

if nodes is None:
else:
nodes_nbrs= ( (n,G[n]) for n in G.nbunch_iter(nodes) )

for v,v_nbrs in nodes_nbrs:
vs=set(v_nbrs)-set([v])
ntriangles=0
for w in vs:
ws=set(G[w])-set([w])
ntriangles+=len(vs.intersection(ws))
yield (v,len(vs),ntriangles)

def _weighted_triangles_and_degree_iter(G, nodes=None, weight='weight'):
""" Return an iterator of (node, degree, weighted_triangles).

Used for weighted clustering.

"""
if G.is_multigraph():
raise NetworkXError("Not defined for multigraphs.")

if weight is None or G.edges()==[]:
max_weight=1.0
else:
max_weight=float(max(d.get(weight,1.0)
for u,v,d in G.edges(data=True)))
if nodes is None:
else:
nodes_nbrs= ( (n,G[n]) for n in G.nbunch_iter(nodes) )

for i,nbrs in nodes_nbrs:
inbrs=set(nbrs)-set([i])
weighted_triangles=0.0
seen=set()
for j in inbrs:
wij=G[i][j].get(weight,1.0)/max_weight
jnbrs=set(G[j])-seen # this keeps from double counting
for k in inbrs&jnbrs:
wjk=G[j][k].get(weight,1.0)/max_weight
wki=G[i][k].get(weight,1.0)/max_weight
weighted_triangles+=(wij*wjk*wki)**(1.0/3.0)
yield (i,len(inbrs),weighted_triangles*2)

[docs]def average_clustering(G, nodes=None, weight=None, count_zeros=True):
r"""Compute the average clustering coefficient for the graph G.

The clustering coefficient for the graph is the average,

.. math::

C = \frac{1}{n}\sum_{v \in G} c_v,

where n is the number of nodes in G.

Parameters
----------
G : graph

nodes : container of nodes, optional (default=all nodes in G)
Compute average clustering for nodes in this container.

weight : string or None, optional (default=None)
The edge attribute that holds the numerical value used as a weight.
If None, then each edge has weight 1.

count_zeros : bool (default=False)
If False include only the nodes with nonzero clustering in the average.

Returns
-------
avg : float
Average clustering

Examples
--------
>>> G=nx.complete_graph(5)
>>> print(nx.average_clustering(G))
1.0

Notes
-----
This is a space saving routine; it might be faster
to use the clustering function to get a list and then take the average.

Self loops are ignored.

References
----------
.. [1] Generalizations of the clustering coefficient to weighted
complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
http://jponnela.com/web_documents/a9.pdf
.. [2] Marcus Kaiser,  Mean clustering coefficients: the role of isolated
nodes and leafs on clustering measures for small-world networks.
http://arxiv.org/abs/0802.2512
"""
c=clustering(G,nodes,weight=weight).values()
if not count_zeros:
c = [v for v in c if v > 0]
return sum(c)/float(len(c))

[docs]def clustering(G, nodes=None, weight=None):
r"""Compute the clustering coefficient for nodes.

For unweighted graphs, the clustering of a node u
is the fraction of possible triangles through that node that exist,

.. math::

c_u = \frac{2 T(u)}{deg(u)(deg(u)-1)},

where T(u) is the number of triangles through node u and
deg(u) is the degree of u.

For weighted graphs, the clustering is defined
as the geometric average of the subgraph edge weights [1]_,

.. math::

c_u = \frac{1}{deg(u)(deg(u)-1))}
\sum_{uv} (\hat{w}_{uv} \hat{w}_{uw} \hat{w}_{vw})^{1/3}.

The edge weights \hat{w}_{uv} are normalized by the maximum weight in the
network \hat{w}_{uv} = w_{uv}/\max(w).

The value of c_u is assigned to 0 if deg(u) < 2.

Parameters
----------
G : graph

nodes : container of nodes, optional (default=all nodes in G)
Compute clustering for nodes in this container.

weight : string or None, optional (default=None)
The edge attribute that holds the numerical value used as a weight.
If None, then each edge has weight 1.

Returns
-------
out : float, or dictionary
Clustering coefficient at specified nodes

Examples
--------
>>> G=nx.complete_graph(5)
>>> print(nx.clustering(G,0))
1.0
>>> print(nx.clustering(G))
{0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}

Notes
-----
Self loops are ignored.

References
----------
.. [1] Generalizations of the clustering coefficient to weighted
complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
http://jponnela.com/web_documents/a9.pdf
"""
if G.is_directed():
raise NetworkXError('Clustering algorithms are not defined ',
'for directed graphs.')
if weight is not None:
td_iter=_weighted_triangles_and_degree_iter(G,nodes,weight)
else:
td_iter=_triangles_and_degree_iter(G,nodes)

clusterc={}

for v,d,t in td_iter:
if t==0:
clusterc[v]=0.0
else:
clusterc[v]=t/float(d*(d-1))

if nodes in G:
return list(clusterc.values())[0] # return single value
return clusterc

[docs]def transitivity(G):
r"""Compute graph transitivity, the fraction of all possible triangles
present in G.

Possible triangles are identified by the number of "triads"
(two edges with a shared vertex).

The transitivity is

.. math::

Parameters
----------
G : graph

Returns
-------
out : float
Transitivity

Examples
--------
>>> G = nx.complete_graph(5)
>>> print(nx.transitivity(G))
1.0
"""
triangles=0 # 6 times number of triangles
contri=0  # 2 times number of connected triples
for v,d,t in _triangles_and_degree_iter(G):
contri += d*(d-1)
triangles += t
if triangles==0: # we had no triangles or possible triangles
return 0.0
else:
return triangles/float(contri)

[docs]def square_clustering(G, nodes=None):
r""" Compute the squares clustering coefficient for nodes.

For each node return the fraction of possible squares that exist at
the node [1]_

.. math::
C_4(v) = \frac{ \sum_{u=1}^{k_v}
\sum_{w=u+1}^{k_v} q_v(u,w) }{ \sum_{u=1}^{k_v}
\sum_{w=u+1}^{k_v} [a_v(u,w) + q_v(u,w)]},

where q_v(u,w) are the number of common neighbors of u and w
other than v (ie squares), and
a_v(u,w) = (k_u - (1+q_v(u,w)+\theta_{uv}))(k_w - (1+q_v(u,w)+\theta_{uw})),
where \theta_{uw} = 1 if u and w are connected and 0 otherwise.

Parameters
----------
G : graph

nodes : container of nodes, optional (default=all nodes in G)
Compute clustering for nodes in this container.

Returns
-------
c4 : dictionary
A dictionary keyed by node with the square clustering coefficient value.

Examples
--------
>>> G=nx.complete_graph(5)
>>> print(nx.square_clustering(G,0))
1.0
>>> print(nx.square_clustering(G))
{0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}

Notes
-----
While C_3(v) (triangle clustering) gives the probability that
two neighbors of node v are connected with each other, C_4(v) is
the probability that two neighbors of node v share a common
neighbor different from v. This algorithm can be applied to both
bipartite and unipartite networks.

References
----------
.. [1] Pedro G. Lind, Marta C. González, and Hans J. Herrmann. 2005
Cycles and clustering in bipartite networks.
Physical Review E (72) 056127.
"""
if nodes is None:
node_iter = G
else:
node_iter =  G.nbunch_iter(nodes)
clustering = {}
for v in node_iter:
clustering[v] = 0.0
potential=0
for u,w in combinations(G[v], 2):
squares = len((set(G[u]) & set(G[w])) - set([v]))
clustering[v] += squares
degm = squares + 1.0
if w in G[u]:
degm += 1
potential += (len(G[u]) - degm) * (len(G[w]) - degm) + squares
if potential > 0:
clustering[v] /= potential
if nodes in G:
return list(clustering.values())[0] # return single value
return clustering