Package networkx :: Module cluster
[hide private]
[frames] | no frames]

Source Code for Module networkx.cluster

  1  """ 
  2  Compute clustering coefficients and transitivity of graphs. 
  3   
  4  Clustering coefficient   
  5    For each node find the fraction of possible triangles that are triangles, 
  6    c_i = triangles_i / (k_i*(k_i-1)/2) 
  7    where k_i is the degree of node i.        
  8   
  9    A coefficient for the whole graph is the average C = avg(c_i) 
 10   
 11  Transitivity  
 12    Find the fraction of all possible triangles which are in fact triangles. 
 13    Possible triangles are identified by the number of "triads" (two edges 
 14    with a shared vertex) 
 15   
 16    T = 3*triangles/triads 
 17   
 18  """ 
 19  __author__ = """Aric Hagberg (hagberg@lanl.gov)\nPieter Swart (swart@lanl.gov)\nDan Schult (dschult@colgate.edu)""" 
 20  __date__ = "$Date: 2005-06-14 12:48:10 -0600 (Tue, 14 Jun 2005) $" 
 21  __credits__ = """""" 
 22  __revision__ = "$Revision: 1012 $" 
 23  #    Copyright (C) 2004,2005 by  
 24  #    Aric Hagberg <hagberg@lanl.gov> 
 25  #    Dan Schult <dschult@colgate.edu> 
 26  #    Pieter Swart <swart@lanl.gov> 
 27  #    Distributed under the terms of the GNU Lesser General Public License 
 28  #    http://www.gnu.org/copyleft/lesser.html 
 29   
30 -def triangles(G,nbunch=None,with_labels=False):
31 """ Return number of triangles for nbunch of nodes. 32 If nbunch is None, then return triangles for every node. 33 If with_labels is True, return a dict keyed by node. 34 35 Note: Each triangle is counted three times: once at each vertex. 36 """ 37 bunch=G.prepare_nbunch(nbunch) 38 39 G_has_edge=G.has_edge # cache functions 40 G_neighbors=G.neighbors 41 42 tri={} 43 for v in bunch: 44 v_nbrs=G_neighbors(v) 45 triangles= [ u for u in v_nbrs for w in v_nbrs if G_has_edge(u,w) ] 46 ntriangles=len(triangles) 47 tri[v]=ntriangles/2 48 49 if with_labels: 50 return tri 51 elif nbunch in G: 52 return tri.values()[0] # return single value 53 return tri.values()
54
55 -def average_clustering(G):
56 """ Average clustering coefficient for a graph. 57 58 Note: this is a space saving routine; It might be faster 59 to use clustering to get a list and then take average. 60 """ 61 order=G.order() 62 s=sum(clustering(G)) 63 return s/float(order)
64
65 -def clustering(G,nbunch=None,with_labels=False,weights=False):
66 """ 67 Clustering coefficient for each node in nbunch. 68 69 If with_labels is True, return a dict keyed by node. 70 71 If both with_labels and weights are True, return both 72 a clustering coefficient dict keyed by node and a 73 dict of weights based on degree. The weights are 74 the fraction of connected triples in the graph which 75 include the keyed node. Ths is useful in moving from 76 transitivity for clustering coefficient and back. 77 """ 78 bunch=G.prepare_nbunch(nbunch) 79 80 G_has_edge=G.has_edge # cache functions 81 G_neighbors=G.neighbors 82 83 clusterc={} 84 for v in bunch: 85 v_nbrs=G_neighbors(v) 86 deg=len(v_nbrs) 87 if deg <= 1: # isolated vertex or single pair gets cc 0 88 clusterc[v]=0.0 89 else: 90 triangles= [ u for u in v_nbrs for w in v_nbrs if G_has_edge(u,w) ] 91 ntriangles=len(triangles) # actually twice number of triangles 92 max_n=deg*(deg-1) # twice the number of possible triangles 93 clusterc[v]=float(ntriangles)/float(max_n) 94 95 if with_labels: 96 if weights: 97 degree=G.degree(with_labels=True) 98 # scale by one over twice the number of triples 99 scale=1./sum([ deg*(deg-1) for deg in degree.itervalues() ]) 100 weight={} 101 for v in clusterc: # only get those nodes in nbunch 102 deg=degree[v] 103 weight[v]=deg*(deg-1)*scale # also twice, but cancels in scaling 104 return clusterc,weight 105 else: 106 return clusterc 107 elif nbunch in G: 108 return clusterc.values()[0] # return single value 109 return clusterc.values()
110
111 -def transitivity(G):
112 """ Transitivity (fraction of transitive triangles) for a graph""" 113 G_has_edge=G.has_edge # cache function 114 G_neighbors=G.neighbors 115 116 triangles=0 # 6 times number of triangles 117 contri=0 # 2 times number of connected triples 118 for v in G.nodes_iter(): 119 v_nbrs=G_neighbors(v) 120 deg=len(v_nbrs) 121 contri += deg*(deg-1) 122 triangles += len([ u for u in v_nbrs for w in v_nbrs if G_has_edge(u,w) ]) 123 t=float(triangles) 124 if t==0: # we had no triangles or possible triangles 125 return t 126 else: 127 return t/float(contri)
128 129
130 -def _test_suite():
131 import doctest 132 suite = doctest.DocFileSuite('tests/cluster.txt',package='networkx') 133 return suite
134 135 if __name__ == "__main__": 136 import os 137 import sys 138 import unittest 139 if sys.version_info[:2] < (2, 4): 140 print "Python version 2.4 or later required for tests (%d.%d detected)." % sys.version_info[:2] 141 sys.exit(-1) 142 # directory of networkx package (relative to this) 143 nxbase=sys.path[0]+os.sep+os.pardir 144 sys.path.insert(0,nxbase) # prepend to search path 145 unittest.TextTestRunner().run(_test_suite()) 146