1 """
2 Generate graphs with a given degree sequence or expected degree sequence.
3
4 """
5
6
7
8
9
10
11
12 __author__ = """Aric Hagberg (hagberg@lanl.gov)\nPieter Swart (swart@lanl.gov)\nDan Schult (dschult@colgate.edu)"""
13
14 import random
15 import networkx
16 import networkx.utils
17 from networkx.generators.classic import empty_graph
18 import heapq
19
20
21
22
24 """Return a random pseudograph with the given degree sequence.
25
26 - `deg_sequence`: degree sequence, a list of integers with each entry
27 corresponding to the degree of a node (need not be
28 sorted). A non-graphical degree sequence (i.e. one
29 not realizable by some simple graph) will raise an
30 Exception.
31 - `seed`: seed for random number generator (default=None)
32
33
34 >>> z=create_degree_sequence(100,powerlaw_sequence)
35 >>> G=configuration_model(z)
36
37 The pseudograph G is a networkx.XGraph that allows multiple (parallel) edges
38 between nodes and edges that connect nodes to themseves (self loops).
39
40 To remove self-loops:
41
42 >>> G.ban_selfloops()
43
44 To remove parallel edges:
45
46 >>> G.ban_multiedges()
47
48 Steps:
49
50 - Check if deg_sequence is a valid degree sequence.
51 - Create N nodes with stubs for attaching edges
52 - Randomly select two available stubs and connect them with an edge.
53
54 As described by Newman [newman-2003-structure].
55
56 Nodes are labeled 1,.., len(deg_sequence),
57 corresponding to their position in deg_sequence.
58
59 This process can lead to duplicate edges and loops, and therefore
60 returns a pseudograph type. You can remove the self-loops and
61 parallel edges (see above) with the likely result of
62 not getting the exat degree sequence specified.
63 This "finite-size effect" decreases as the size of the graph increases.
64
65 References:
66
67 [newman-2003-structure] M.E.J. Newman, "The structure and function
68 of complex networks", SIAM REVIEW 45-2, pp 167-256, 2003.
69
70 """
71 if not sum(deg_sequence)%2 ==0:
72 raise networkx.NetworkXError, 'Invalid degree sequence'
73
74 if not seed is None:
75 random.seed(seed)
76
77
78 N=len(deg_sequence)
79
80
81
82 G=networkx.empty_graph(N,create_using=networkx.XGraph(multiedges=True, \
83 selfloops=True))
84
85 if N==0 or max(deg_sequence)==0:
86 return G
87
88
89
90
91
92 stublist=[]
93 for n in G:
94 for i in range(deg_sequence[n-1]):
95 stublist.append(n)
96
97
98 random.shuffle(stublist)
99 while stublist:
100 n1 = stublist.pop()
101 n2 = stublist.pop()
102 G.add_edge(n1,n2)
103
104 G.name="configuration_model %d nodes %d edges"%(G.order(),G.size())
105 return G
106
107
109 """Return a random graph G(w) with expected degrees given by w.
110
111 :Parameters:
112 - `w`: a list of expected degrees
113 - `seed`: seed for random number generator (default=None)
114
115 >>> z=[10 for i in range(100)]
116 >>> G=expected_degree_graph(z)
117
118 To remove self-loops:
119
120 >>> G.ban_selfloops()
121
122 Reference::
123
124 @Article{connected-components-2002,
125 author = {Fan Chung and L. Lu},
126 title = {Connected components in random graphs
127 with given expected degree sequences},
128 journal = {Ann. Combinatorics},
129 year = {2002},
130 volume = {6},
131 pages = {125-145},
132 }
133
134
135 """
136
137 n = len(w)
138
139 G=networkx.empty_graph(n,create_using=networkx.XGraph(selfloops=True))
140 G.name="random_expected_degree_graph"
141
142 if n==0 or max(w)==0:
143 return G
144
145 d = sum(w)
146 rho = 1.0 / float(d)
147 for i in xrange(n):
148 if (w[i])**2 > d:
149 raise networkx.NetworkXError,\
150 "NetworkXError w[i]**2 must be <= sum(w)\
151 for all i, w[%d] = %f, sum(w) = %f" % (i,w[i],d)
152
153 if seed is not None:
154 random.seed(seed)
155
156 for u in xrange(n):
157 for v in xrange(u,n):
158 if random.random() < w[u]*w[v]*rho:
159 G.add_edge(u,v)
160 return G
161
162
164 """Return a simple graph with given degree sequence, constructed using the
165 Havel-Hakimi algorithm.
166
167 - `deg_sequence`: degree sequence, a list of integers with each entry
168 corresponding to the degree of a node (need not be sorted).
169 A non-graphical degree sequence (not sorted).
170 A non-graphical degree sequence (i.e. one
171 not realizable by some simple graph) raises an Exception.
172
173 The Havel-Hakimi algorithm constructs a simple graph by
174 successively connecting the node of highest degree to other nodes
175 of highest degree, resorting remaining nodes by degree, and
176 repeating the process. The resulting graph has a high
177 degree-associativity. Nodes are labeled 1,.., len(deg_sequence),
178 corresponding to their position in deg_sequence.
179
180 See Theorem 1.4 in [chartrand-graphs-1996].
181 This algorithm is also used in the function is_valid_degree_sequence.
182
183 References:
184
185 [chartrand-graphs-1996] G. Chartrand and L. Lesniak, "Graphs and Digraphs",
186 Chapman and Hall/CRC, 1996.
187
188 """
189 if not is_valid_degree_sequence(deg_sequence):
190 raise networkx.NetworkXError, 'Invalid degree sequence'
191
192
193 N=len(deg_sequence)
194 G=networkx.empty_graph(N)
195
196 if N==0 or max(deg_sequence)==0:
197 return G
198
199
200 stublist=[ [deg_sequence[n],n] for n in G]
201
202 while stublist:
203 stublist.sort()
204 if stublist[0][0]<0:
205 return False
206
207 (freestubs,source) = stublist.pop()
208 if freestubs==0: break
209 if freestubs > len(stublist):
210 return False
211
212
213 for stubtarget in stublist[-freestubs:]:
214 G.add_edge(source, stubtarget[1])
215 stubtarget[0] -= 1
216
217 G.name="havel_hakimi_graph %d nodes %d edges"%(G.order(),G.size())
218 return G
219
221 """
222 Make a tree for the given degree sequence.
223
224 A tree has #nodes-#edges=1 so
225 the degree sequence must have
226 len(deg_sequence)-sum(deg_sequence)/2=1
227 """
228
229 if not len(deg_sequence)-sum(deg_sequence)/2.0 == 1.0:
230 raise networkx.NetworkXError,"Degree sequence invalid"
231
232 G=empty_graph(0)
233
234 if len(deg_sequence)==1:
235 return G
236 deg=[s for s in deg_sequence if s>1]
237 deg.sort(reverse=True)
238
239
240 n=len(deg)+2
241 G=networkx.path_graph(n)
242 last=n
243
244
245 for source in range(1,n-1):
246 nedges=deg.pop()-2
247 for target in range(last,last+nedges):
248 G.add_edge(source, target)
249 last+=nedges
250
251
252 if len(G.degree())>len(deg_sequence):
253 G.delete_node(0)
254 return G
255
256
258 """Return True if deg_sequence is a valid sequence of integer degrees
259 equal to the degree sequence of some simple graph.
260
261 - `deg_sequence`: degree sequence, a list of integers with each entry
262 corresponding to the degree of a node (need not be sorted).
263 A non-graphical degree sequence (i.e. one not realizable by some
264 simple graph) will raise an exception.
265
266 See Theorem 1.4 in [chartrand-graphs-1996]. This algorithm is also used
267 in havel_hakimi_graph()
268
269 References:
270
271 [chartrand-graphs-1996] G. Chartrand and L. Lesniak, "Graphs and Digraphs",
272 Chapman and Hall/CRC, 1996.
273
274 """
275
276 if deg_sequence==[]:
277 return True
278 if not networkx.utils.is_list_of_ints(deg_sequence):
279 return False
280 if min(deg_sequence)<0:
281 return False
282 if sum(deg_sequence)%2:
283 return False
284
285
286
287
288 s=deg_sequence[:]
289 while s:
290 s.sort()
291 if s[0]<0:
292 return False
293
294 d=s.pop()
295 if d==0: return True
296
297
298 if d>len(s): return False
299
300
301 s.reverse()
302 for i in range(d):
303 s[i]-=1
304
305
306 return False
307
308
310 """ Attempt to create a valid degree sequence of length n using
311 specified function sfunction(n,**kwds).
312
313 - `n`: length of degree sequence = number of nodes
314 - `sfunction`: a function, called as "sfunction(n,**kwds)",
315 that returns a list of n real or integer values.
316 - `max_tries`: max number of attempts at creating valid degree
317 sequence.
318
319 Repeatedly create a degree sequence by calling sfunction(n,**kwds)
320 until achieving a valid degree sequence. If unsuccessful after
321 max_tries attempts, raise an exception.
322
323 For examples of sfunctions that return sequences of random numbers,
324 see networkx.Utils.
325
326 >>> from networkx.utils import *
327 >>> seq=create_degree_sequence(10,uniform_sequence)
328
329 """
330 tries=0
331 max_deg=n
332 while tries < max_tries:
333 trialseq=sfunction(n,**kwds)
334
335 seq=[min(max_deg, max( int(round(s)),0 )) for s in trialseq]
336
337 if is_valid_degree_sequence(seq):
338 return seq
339 tries+=1
340 raise networkx.NetworkXError, \
341 "Exceeded max (%d) attempts at a valid sequence."%max_tries
342
344 """Attempt nswap double-edge swaps on the graph G.
345
346 Return count of successful swaps.
347 The graph G is modified in place.
348 A double-edge swap removes two randomly choseen edges u-v and x-y
349 and creates the new edges u-x and v-y::
350
351 u--v u v
352 becomes | |
353 x--y x y
354
355
356 If either the edge u-x or v-y already exist no swap is performed so
357 the actual count of swapped edges is always <= nswap
358
359 Does not enforce any connectivity constraints.
360 """
361
362
363
364 n=0
365 swapcount=0
366 deg=G.degree(with_labels=True)
367 dk=deg.keys()
368 cdf=networkx.utils.cumulative_distribution(deg.values())
369 if len(cdf)<4:
370 raise networkx.NetworkXError, \
371 "Graph has less than four nodes."
372 while n < nswap:
373
374
375
376 (ui,xi)=networkx.utils.discrete_sequence(2,cdistribution=cdf)
377 if ui==xi: continue
378 u=dk[ui]
379 x=dk[xi]
380 v=random.choice(G[u])
381 y=random.choice(G[x])
382 if v==y: continue
383 if (not G.has_edge(u,x)) and (not G.has_edge(v,y)):
384 G.add_edge(u,x)
385 G.add_edge(v,y)
386 G.delete_edge(u,v)
387 G.delete_edge(x,y)
388 swapcount+=1
389 n+=1
390 return swapcount
391
393 """Attempt nswap double-edge swaps on the graph G.
394
395 Returns count of successful swaps. Enforces connectivity.
396 The graph G is modified in place.
397
398 A double-edge swap removes two randomly choseen edges u-v and x-y
399 and creates the new edges u-x and v-y::
400
401 u--v u v
402 becomes | |
403 x--y x y
404
405
406 If either the edge u-x or v-y already exist no swap is performed so
407 the actual count of swapped edges is always <= nswap
408
409 The initial graph G must be connected and the resulting graph is connected.
410
411 Reference::
412
413 @misc{gkantsidis-03-markov,
414 author = "C. Gkantsidis and M. Mihail and E. Zegura",
415 title = "The Markov chain simulation method for generating connected
416 power law random graphs",
417 year = "2003",
418 url = "http://citeseer.ist.psu.edu/gkantsidis03markov.html"
419 }
420
421
422 """
423 import math
424 if not networkx.is_connected(G):
425 raise networkx.NetworkXException("Graph not connected")
426
427 n=0
428 swapcount=0
429 deg=G.degree(with_labels=True)
430 dk=deg.keys()
431 ideg=G.degree()
432 cdf=networkx.utils.cumulative_distribution(G.degree())
433 if len(cdf)<4:
434 raise networkx.NetworkXError, \
435 "Graph has less than four nodes."
436 window=1
437 while n < nswap:
438 wcount=0
439 swapped=[]
440 while wcount < window and n < nswap:
441
442
443 (ui,xi)=networkx.utils.discrete_sequence(2,cdistribution=cdf)
444 if ui==xi: continue
445 u=dk[ui]
446 x=dk[xi]
447 v=random.choice(G[u])
448 y=random.choice(G[x])
449 if v==y: continue
450 if (not G.has_edge(u,x)) and (not G.has_edge(v,y)):
451 G.delete_edge(u,v)
452 G.delete_edge(x,y)
453 G.add_edge(u,x)
454 G.add_edge(v,y)
455 swapped.append((u,v,x,y))
456 swapcount+=1
457 n+=1
458 wcount+=1
459 if networkx.is_connected(G):
460 window+=1
461 else:
462 while swapped:
463 (u,v,x,y)=swapped.pop()
464 G.add_edge(u,v)
465 G.add_edge(x,y)
466 G.delete_edge(u,x)
467 G.delete_edge(v,y)
468 swapcount-=1
469 window = int(math.ceil(float(window)/2))
470 assert G.degree() == ideg
471 return swapcount
472
473
474
476 """Generates a graph based with a given degree sequence and maximizing
477 the s-metric. Experimental implementation.
478
479 Maximum s-metrix means that high degree nodes are connected to high
480 degree nodes.
481
482 - `degree_seq`: degree sequence, a list of integers with each entry
483 corresponding to the degree of a node.
484 A non-graphical degree sequence raises an Exception.
485
486 Reference::
487
488 @unpublished{li-2005,
489 author = {Lun Li and David Alderson and Reiko Tanaka
490 and John C. Doyle and Walter Willinger},
491 title = {Towards a Theory of Scale-Free Graphs:
492 Definition, Properties, and Implications (Extended Version)},
493 url = {http://arxiv.org/abs/cond-mat/0501169},
494 year = {2005}
495 }
496
497 The algorithm::
498
499 STEP 0 - Initialization
500 A = {0}
501 B = {1, 2, 3, ..., n}
502 O = {(i; j), ..., (k, l),...} where i < j, i <= k < l and
503 d_i * d_j >= d_k *d_l
504 wA = d_1
505 dB = sum(degrees)
506
507 STEP 1 - Link selection
508 (a) If |O| = 0 TERMINATE. Return graph A.
509 (b) Select element(s) (i, j) in O having the largest d_i * d_j , if for
510 any i or j either w_i = 0 or w_j = 0 delete (i, j) from O
511 (c) If there are no elements selected go to (a).
512 (d) Select the link (i, j) having the largest value w_i (where for each
513 (i, j) w_i is the smaller of w_i and w_j ), and proceed to STEP 2.
514
515 STEP 2 - Link addition
516 Type 1: i in A and j in B.
517 Add j to the graph A and remove it from the set B add a link
518 (i, j) to the graph A. Update variables:
519 wA = wA + d_j -2 and dB = dB - d_j
520 Decrement w_i and w_j with one. Delete (i, j) from O
521 Type 2: i and j in A.
522 Check Tree Condition: If dB = 2 * |B| - wA.
523 Delete (i, j) from O, continue to STEP 3
524 Check Disconnected Cluster Condition: If wA = 2.
525 Delete (i, j) from O, continue to STEP 3
526 Add the link (i, j) to the graph A
527 Decrement w_i and w_j with one, and wA = wA -2
528 STEP 3
529 Go to STEP 1
530
531
532 The article states that the algorithm will result in a maximal s-metric.
533 This implementation can not guarantee such maximality. I may have
534 misunderstood the algorithm, but I can not see how it can be anything
535 but a heuristic. Please contact me at sundsdal@gmail.com if you can
536 provide python code that can guarantee maximality.
537 Several optimizations are included in this code and it may be hard to read.
538 Commented code to come.
539 """
540
541 if not is_valid_degree_sequence(degree_seq):
542 raise networkx.NetworkXError, 'Invalid degree sequence'
543 degree_seq.sort()
544 degree_seq.reverse()
545 degrees_left = degree_seq[:]
546 A_graph = networkx.Graph()
547 A_graph.add_node(0)
548 a_list = [False]*len(degree_seq)
549 b_set = set(range(1,len(degree_seq)))
550 a_open = set([0])
551 O = []
552 for j in b_set:
553 heapq.heappush(O, (-degree_seq[0]*degree_seq[j], (0,j)))
554 wa = degrees_left[0]
555 db = sum(degree_seq) - degree_seq[0]
556 a_list[0] = True
557 bsize = len(degree_seq) -1
558 selected = []
559 weight = 0
560 while O or selected:
561 if len(selected) <1 :
562 firstrun = True
563 while O:
564 (newweight, (i,j)) = heapq.heappop(O)
565 if degrees_left[i] < 1 or degrees_left[j] < 1 :
566 continue
567 if firstrun:
568 firstrun = False
569 weight = newweight
570 if not newweight == weight:
571 break
572 heapq.heappush(selected, [-degrees_left[i], \
573 -degrees_left[j], (i,j)])
574 if not weight == newweight:
575 heapq.heappush(O,(newweight, (i,j)))
576 weight *= -1
577 if len(selected) < 1:
578 break
579
580 [w1, w2, (i,j)] = heapq.heappop(selected)
581 if degrees_left[i] < 1 or degrees_left[j] < 1 :
582 continue
583 if a_list[i] and j in b_set:
584
585 a_list[j] = True
586 b_set.remove(j)
587 A_graph.add_node(j)
588 A_graph.add_edge(i, j)
589 degrees_left[i] -= 1
590 degrees_left[j] -= 1
591 wa += degree_seq[j] - 2
592 db -= degree_seq[j]
593 bsize -= 1
594 newweight = weight
595 if not degrees_left[j] == 0:
596 a_open.add(j)
597 for k in b_set:
598 if A_graph.has_edge(j, k): continue
599 w = degree_seq[j]*degree_seq[k]
600 if w > newweight:
601 newweight = w
602 if weight == w and not newweight > weight:
603 heapq.heappush(selected, [-degrees_left[j], \
604 -degrees_left[k], (j,k)])
605 else:
606 heapq.heappush(O, (-w, (j,k)))
607 if not weight == newweight:
608 while selected:
609 [w1,w2,(i,j)] = heapq.heappop(selected)
610 if degrees_left[i]*degrees_left[j] > 0:
611 heapq.heappush(O, [-degree_seq[i]*degree_seq[j],(i,j)])
612 if degrees_left[i] == 0:
613 a_open.discard(i)
614
615 else:
616
617 if db == (2*bsize - wa):
618
619
620 continue
621 elif db < 2*bsize -wa:
622 raise networkx.NetworkXError, "THIS SHOULD NOT HAPPEN! - not graphable"
623 continue
624 elif wa == 2 and bsize > 0:
625
626
627 continue
628 elif wa == db - (bsize)*(bsize-1):
629
630 continue
631 A_graph.add_edge(i, j)
632 degrees_left[i] -= 1
633 degrees_left[j] -= 1
634 if degrees_left[i] < 1:
635 a_open.discard(i)
636 if degrees_left[j] < 1:
637 a_open.discard(j)
638 wa -= 2
639 if not degrees_left[i] < 0 and not degrees_left[j] < 0:
640 selected2 = (selected)
641 selected = []
642 while selected2:
643 [w1,w1, (i,j)] = heapq.heappop(selected2)
644 if degrees_left[i]*degrees_left[j] > 0:
645 heapq.heappush(selected, [-degrees_left[i], \
646 -degrees_left[j], (i,j)])
647 return A_graph
648
666
668 """
669 Return the "s-Metric" of graph G:
670 the sum of the product deg(u)*deg(v) for every edge u-v in G
671
672 Reference::
673
674 @unpublished{li-2005,
675 author = {Lun Li and David Alderson and
676 John C. Doyle and Walter Willinger},
677 title = {Towards a Theory of Scale-Free Graphs:
678 Definition, Properties, and Implications (Extended Version)},
679 url = {http://arxiv.org/abs/cond-mat/0501169},
680 year = {2005}
681 }
682
683 """
684
685 return sum([G.degree(u)*G.degree(v) for (u,v) in G.edges_iter()])
686
688 import doctest
689 suite = doctest.DocFileSuite('tests/generators/degree_seq.txt',
690 package='networkx')
691 return suite
692
693
694 if __name__ == "__main__":
695 import os
696 import sys
697 import unittest
698 if sys.version_info[:2] < (2, 4):
699 print "Python version 2.4 or later required (%d.%d detected)." \
700 % sys.version_info[:2]
701 sys.exit(-1)
702
703 nxbase=sys.path[0]+os.sep+os.pardir
704 sys.path.insert(0,nxbase)
705 unittest.TextTestRunner().run(_test_suite())
706