networkx.io

1 """ 2 Read and write NetworkX graphs. 3 4 Note that NetworkX graphs can contain any hashable Python object as 5 node (not just integers and strings). So writing a NetworkX graph 6 as a text file may not always be what you want: see write_gpickle 7 and gread_gpickle for that case. 8 9 This module provides the following : 10 11 Edgelist format: 12 Useful for connected graphs with or without edge data. 13 14 write_edgelist(G, path) 15 G=read_edgelist(path) 16 17 Adjacency list with single line per node: 18 Useful for connected or unconnected graphs without edge data. 19 20 write_adjlist(G, path) 21 G=read_adjlist(path) 22 23 Adjacency list with multiple lines per node: 24 Useful for connected or unconnected graphs with or without edge data. 25 26 write_multiline_adjlist(G, path) 27 read_multiline_adjlist(path) 28 29 Python pickled format: 30 Useful for graphs with non text representable data. 31 32 write_gpickle(G, path) 33 read_gpickle(path) 34 35 """ 36 __author__ = """Aric Hagberg (hagberg@lanl.gov)\nDan Schult (dschult@colgate.edu)""" 37 __date__ = """""" 38 __credits__ = """""" 39 __revision__ = "$$" 40 # Copyright (C) 2004-2006 by 41 # Aric Hagberg <hagberg@lanl.gov> 42 # Dan Schult <dschult@colgate.edu> 43 # Pieter Swart <swart@lanl.gov> 44 # Distributed under the terms of the GNU Lesser General Public License 45 # http://www.gnu.org/copyleft/lesser.html 46 47 import cPickle 48 import codecs 49 import locale 50 import string 51 import sys 52 import time 53 54 from networkx.utils import is_string_like 55 import networkx 56

57 -def write_multiline_adjlist(G, path, delimiter=' ', comments='#'):

58 """ 59 Write the graph G in multiline adjacency list format to the file 60 or file handle path. 61 62 See read_multiline_adjlist for file format details. 63 64 >>> write_multiline_adjlist(G,"file.adjlist") 65 66 path can be a filehandle or a string with the name of the file. 67 68 >>> fh=open("file.adjlist") 69 >>> write_multiline_adjlist(G,fh) 70 71 Filenames ending in .gz or .bz2 will be compressed. 72 73 >>> write_multiline_adjlist(G,"file.adjlist.gz") 74 75 The file will use the default text encoding on your system. 76 It is possible to write files in other encodings by opening 77 the file with the codecs module. See doc/examples/unicode.py 78 for hints. 79 80 >>> import codecs 81 >>> fh=codecs.open("file.adjlist",encoding='utf=8') # use utf-8 encoding 82 >>> write_multiline_adjlist(G,fh) 83 84 """ 85 fh=_get_fh(path,mode='w') 86 pargs=comments+" "+string.join(sys.argv,' ') 87 fh.write("%s\n" % (pargs)) 88 fh.write(comments+" GMT %s\n" % (time.asctime(time.gmtime()))) 89 fh.write(comments+" %s\n" % (G.name)) 90 91 # directed 92 directed=G.is_directed() 93 94 seen={} # helper dict used to avoid duplicate edges 95 for s in G.nodes(): 96 edges=[ edge for edge in G.edges_iter(s) if edge[1] not in seen ] 97 deg=len(edges) 98 if is_string_like(s): 99 fh.write(s+delimiter) 100 else: 101 fh.write(str(s)+delimiter) 102 fh.write("%i\n"%(deg)) 103 for edge in edges: 104 t=edge[1] 105 if len(edge)==2: # Graph or DiGraph 106 d=None 107 else: # XGraph or XDiGraph 108 d=edge[2] # Note: could still be None 109 if d is None: 110 if is_string_like(t): 111 fh.write(t+'\n') 112 else: 113 fh.write(str(t)+'\n') 114 else: 115 if is_string_like(t): 116 fh.write(t+delimiter) 117 else: 118 fh.write(str(t)+delimiter) 119 if is_string_like(d): 120 fh.write(d+"\n") 121 else: 122 fh.write(str(d)+"\n") 123 if not directed: 124 seen[s]=1

125

126 -def read_multiline_adjlist(path, comments="#", delimiter=' ', 127 create_using=None, 128 nodetype=None, edgetype=None):

129 """Read graph in multi-line adjacency list format from path. 130 131 >>> G=read_multiline_adjlist("file.adjlist") 132 133 path can be a filehandle or a string with the name of the file. 134 135 >>> fh=open("file.adjlist") 136 >>> G=read_multiline_adjlist(fh) 137 138 Filenames ending in .gz or .bz2 will be compressed. 139 140 >>> G=read_multiline_adjlist("file.adjlist.gz") 141 142 nodetype is an optional function to convert node strings to nodetype 143 144 For example 145 146 >>> G=read_multiline_adjlist("file.adjlist", nodetype=int) 147 148 will attempt to convert all nodes to integer type 149 150 Since nodes must be hashable, the function nodetype must return hashable 151 types (e.g. int, float, str, frozenset - or tuples of those, etc.) 152 153 edgetype is a function to convert edge data strings to edgetype 154 155 >>> G=read_multiline_adjlist("file.adjlist", edgetype=int) 156 157 create_using is an optional networkx graph type, the default is 158 Graph(), a simple undirected graph 159 160 >>> G=read_multiline_adjlist("file.adjlist", create_using=DiGraph()) 161 162 The comments character (default='#') at the beginning of a 163 line indicates a comment line. 164 165 The entries are separated by delimiter (default=' '). 166 If whitespace is significant in node or edge labels you should use 167 some other delimiter such as a tab or other symbol. 168 169 170 Example multiline adjlist file format:: 171 172 # source target for Graph or DiGraph 173 a 2 174 b 175 c 176 d 1 177 e 178 179 or 180 181 # source target for XGraph or XDiGraph with edge data 182 a 2 183 b edge-ab-data 184 c edge-ac-data 185 d 1 186 e edge-de-data 187 188 Reading the file will use the default text encoding on your system. 189 It is possible to read files with other encodings by opening 190 the file with the codecs module. See doc/examples/unicode.py 191 for hints. 192 193 >>> import codecs 194 >>> fh=codecs.open("file.adjlist", encoding='utf=8') # use utf-8 encoding 195 >>> G=read_multiline_adjlist(fh) 196 """ 197 if create_using is None: 198 G=networkx.Graph() 199 else: 200 try: 201 G=create_using 202 G.clear() 203 except: 204 raise TypeError("Input graph is not a networkx graph type") 205 206 # is this a XGraph or XDiGraph? 207 if hasattr(G,'allow_multiedges')==True: 208 xgraph=True 209 else: 210 xgraph=False 211 212 inp=_get_fh(path) 213 214 for line in inp: 215 # if line.startswith("#") or line.startswith("\n"): 216 # continue 217 # line=line.strip() #remove trailing \n 218 line = line[:line.find(comments)].strip() 219 if not len(line): continue 220 try: 221 (u,deg)=line.split(delimiter) 222 deg=int(deg) 223 except: 224 raise TypeError("Failed to read node and degree on line (%s)"%line) 225 try: 226 if nodetype is not None: 227 u=nodetype(u) 228 except: 229 raise TypeError("Failed to convert node (%s) to type %s"\ 230 %(u,nodetype)) 231 G.add_node(u) 232 for i in range(deg): 233 line=inp.next().strip() 234 vlist=line.split(delimiter) 235 if len(vlist)==1: 236 v=vlist[0] 237 d=None 238 elif len(vlist)==2: 239 (v,d)=vlist 240 else: 241 raise TypeError("Failed to read line: %s"%vlist) 242 try: 243 if nodetype is not None: 244 v=nodetype(v) 245 except: 246 raise TypeError("Failed to convert node (%s) to type %s"\ 247 %(v,nodetype)) 248 if xgraph: 249 if d is not None: 250 try: 251 if edgetype is not None: 252 d=edgetype(d) 253 except: 254 raise TypeError\ 255 ("Failed to convert edge data (%s) to type %s"\ 256 %(d, edgetype)) 257 G.add_edge(u,v,d) 258 else: 259 G.add_edge(u,v) 260 261 return G

262 263

264 -def write_adjlist(G, path, comments="#", delimiter=' '):

265 """Write graph G in single-line adjacency-list format to path. 266 267 See read_adjlist for file format details. 268 269 >>> write_adjlist(G, "file.adjlist") 270 271 path can be a filehandle or a string with the name of the file. 272 273 >>> fh=open("file.adjlist") 274 >>> write_adjlist(G, fh) 275 276 Filenames ending in .gz or .bz2 will be compressed. 277 278 >>> write_adjlist(G, "file.adjlist.gz") 279 280 The file will use the default text encoding on your system. 281 It is possible to write files in other encodings by opening 282 the file with the codecs module. See doc/examples/unicode.py 283 for hints. 284 285 >>> import codecs 286 >>> fh=codecs.open("file.adjlist",encoding='utf=8') # use utf-8 encoding 287 >>> write_adjlist(G,fh) 288 289 Does not handle data in XGraph or XDiGraph, use 'write_edgelist' 290 or 'write_multiline_adjlist' 291 """ 292 fh=_get_fh(path,mode='w') 293 pargs=comments+" "+string.join(sys.argv,' ') 294 fh.write("%s\n" % (pargs)) 295 fh.write(comments+" GMT %s\n" % (time.asctime(time.gmtime()))) 296 fh.write(comments+" %s\n" % (G.name)) 297 e={} # helper dict used to avoid duplicate edges 298 try: 299 multiedges=G.multiedges 300 except: 301 multiedges=False 302 303 # directed 304 directed=G.is_directed() 305 306 for s in G.nodes(): 307 if is_string_like(s): 308 fh.write(s+delimiter) 309 else: 310 fh.write(str(s)+delimiter) 311 for t in G.neighbors(s): 312 if not directed: 313 if e.has_key((t,s)): 314 continue 315 e.setdefault((s,t),1) 316 if multiedges: 317 for d in G.get_edge(s,t): 318 if is_string_like(t): 319 fh.write(t+delimiter) 320 else: 321 fh.write(str(t)+delimiter) 322 else: 323 if is_string_like(t): 324 fh.write(t+delimiter) 325 else: 326 fh.write(str(t)+delimiter) 327 fh.write("\n")

328 329

330 -def read_adjlist(path, comments="#", delimiter=' ', 331 create_using=None, nodetype=None):

332 """Read graph in single line adjacency list format from path. 333 334 >>> G=read_adjlist("file.adjlist") 335 336 path can be a filehandle or a string with the name of the file. 337 338 >>> fh=open("file.adjlist") 339 >>> G=read_adjlist(fh) 340 341 Filenames ending in .gz or .bz2 will be compressed. 342 343 >>> G=read_adjlist("file.adjlist.gz") 344 345 nodetype is an optional function to convert node strings to nodetype 346 347 For example 348 349 >>> G=read_adjlist("file.adjlist", nodetype=int) 350 351 will attempt to convert all nodes to integer type 352 353 Since nodes must be hashable, the function nodetype must return hashable 354 types (e.g. int, float, str, frozenset - or tuples of those, etc.) 355 356 create_using is an optional networkx graph type, the default is 357 Graph(), a simple undirected graph 358 359 >>> G=read_adjlist("file.adjlist", create_using=DiGraph()) 360 361 Does not handle edge data: use 'read_edgelist' or 'read_multiline_adjlist' 362 363 The comments character (default='#') at the beginning of a 364 line indicates a comment line. 365 366 The entries are separated by delimiter (default=' '). 367 If whitespace is significant in node or edge labels you should use 368 some other delimiter such as a tab or other symbol. 369 370 # source target 371 a b c 372 d e 373 374 """ 375 if create_using is None: 376 G=networkx.Graph() 377 else: 378 try: 379 G=create_using 380 G.clear() 381 except: 382 raise TypeError("Input graph is not a networkx graph type") 383 384 fh=_get_fh(path) 385 386 for line in fh.readlines(): 387 line = line[:line.find(comments)].strip() 388 if not len(line): continue 389 # if line.startswith("#") or line.startswith("\n"): 390 # continue 391 # line=line.strip() #remove trailing \n 392 vlist=line.split(delimiter) 393 u=vlist.pop(0) 394 # convert types 395 if nodetype is not None: 396 try: 397 u=nodetype(u) 398 except: 399 raise TypeError("Failed to convert node (%s) to type %s"\ 400 %(u,nodetype)) 401 G.add_node(u) 402 try: 403 vlist=map(nodetype,vlist) 404 except: 405 raise TypeError("Failed to convert nodes (%s) to type %s"\ 406 %(','.join(vlist),nodetype)) 407 for v in vlist: 408 G.add_edge(u,v) 409 return G

410 411

412 -def write_edgelist(G, path, comments="#", delimiter=' '):

413 """Write graph G in edgelist format on file path. 414 415 See read_edgelist for file format details. 416 417 >>> write_edgelist(G, "file.edgelist") 418 419 path can be a filehandle or a string with the name of the file. 420 421 >>> fh=open("file.edgelist") 422 >>> write_edgelist(G,fh) 423 424 Filenames ending in .gz or .bz2 will be compressed. 425 426 >>> write_edgelist(G, "file.edgelist.gz") 427 428 The file will use the default text encoding on your system. 429 It is possible to write files in other encodings by opening 430 the file with the codecs module. See doc/examples/unicode.py 431 for hints. 432 433 >>> import codecs 434 >>> fh=codecs.open("file.edgelist",encoding='utf=8') # use utf-8 encoding 435 >>> write_edgelist(G,fh) 436 437 438 """ 439 fh=_get_fh(path,mode='w') 440 441 pargs=comments+" "+string.join(sys.argv,' ') 442 fh.write("%s\n" % (pargs)) 443 fh.write(comments+" GMT %s\n" % (time.asctime(time.gmtime()))) 444 fh.write(comments+" %s\n" % (G.name)) 445 for e in G.edges(): 446 for n in e: # handle Graph or XGraph, two- or three-tuple 447 if n is None: continue # don't write data for XGraph None 448 if is_string_like(n): 449 fh.write(n+delimiter) 450 else: 451 fh.write(str(n)+delimiter) 452 fh.write("\n")

453

454 -def read_edgelist(path, comments="#", delimiter=' ', 455 create_using=None, nodetype=None, edgetype=None):

456 """Read graph in edgelist format from path. 457 458 >>> G=read_edgelist("file.edgelist") 459 460 path can be a filehandle or a string with the name of the file. 461 462 >>> fh=open("file.edgelist") 463 >>> G=read_edgelist(fh) 464 465 Filenames ending in .gz or .bz2 will be compressed. 466 467 >>> G=read_edgelist("file.edgelist.gz") 468 469 nodetype is an optional function to convert node strings to nodetype 470 471 For example 472 473 >>> G=read_edgelist("file.edgelist", nodetype=int) 474 475 will attempt to convert all nodes to integer type 476 477 Since nodes must be hashable, the function nodetype must return hashable 478 types (e.g. int, float, str, frozenset - or tuples of those, etc.) 479 480 create_using is an optional networkx graph type, the default is 481 Graph(), a simple undirected graph 482 483 >>> G=read_edgelist("file.edgelist",create_using=DiGraph()) 484 485 486 The comments character (default='#') at the beginning of a 487 line indicates a comment line. 488 489 The entries are separated by delimiter (default=' '). 490 If whitespace is significant in node or edge labels you should use 491 some other delimiter such as a tab or other symbol. 492 493 Example edgelist file format:: 494 495 # source target 496 a b 497 a c 498 d e 499 500 or for an XGraph() with edge data 501 502 # source target data 503 a b 1 504 a c 3.14159 505 d e apple 506 507 """ 508 if create_using is None: 509 G=networkx.Graph() 510 else: 511 try: 512 G=create_using 513 G.clear() 514 except: 515 raise TypeError("Input graph is not a networkx graph type") 516 517 # is this a XGraph or XDiGraph? 518 if hasattr(G,'allow_multiedges')==True: 519 xgraph=True 520 else: 521 xgraph=False 522 523 fh=_get_fh(path) 524 525 for line in fh.readlines(): 526 line = line[:line.find(comments)].strip() 527 if not len(line): continue 528 # if line.startswith("#") or line.startswith("\n"): 529 # continue 530 # line=line.strip() #remove trailing \n 531 # split line, should have 2 or three items 532 s=line.split(delimiter) 533 if len(s)==2: 534 (u,v)=s 535 d=None 536 elif len(s)==3: 537 (u,v,d)=s 538 else: 539 raise TypeError("Failed to read line: %s"%line) 540 541 # convert types 542 try: 543 (u,v)=map(nodetype,(u,v)) 544 except: 545 raise TypeError("Failed to convert edge (%s, %s) to type %s"\ 546 %(u,v,nodetype)) 547 if d is not None and edgetype is not None: 548 549 try: 550 d=edgetype(d) 551 except: 552 raise TypeError("Failed to convert edge data (%s) to type %s"\ 553 %(d, edgetype)) 554 555 if xgraph: 556 G.add_edge(u,v,d) # XGraph or XDiGraph 557 else: 558 G.add_edge(u,v) # Graph or DiGraph 559 560 return G

561

562 -def write_gpickle(G, path):

563 """ 564 Write graph object in Python pickle format. 565 566 This will preserve Python objects used as nodes or edges. 567 568 >>> write_gpickle(G,"file.gpickle") 569 570 See cPickle. 571 572 """ 573 fh=_get_fh(path,mode='wb') 574 cPickle.dump(G,fh,cPickle.HIGHEST_PROTOCOL)

575

576 -def read_gpickle(path):

577 """ 578 Read graph object in Python pickle format 579 580 >>> G=read_gpickle("file.gpickle") 581 582 See cPickle. 583 584 """ 585 fh=_get_fh(path,'rb') 586 return cPickle.load(fh)

587 588

589 -def write_yaml(G, path, default_flow_style=False, **kwds):

590 """Write graph G in YAML text format to path. 591 592 See http://www.yaml.org 593 594 """ 595 try: 596 import yaml 597 except ImportError: 598 raise ImportError, \ 599 "Import Error: not able to import yaml: http://www.yaml.org " 600 fh=_get_fh(path,mode='w') 601 yaml.dump(G,fh,default_flow_style=default_flow_style,**kwds)

602 603

604 -def read_yaml(path):

605 """Read graph from YAML format from path. 606 607 See http://www.yaml.org 608 609 """ 610 try: 611 import yaml 612 except ImportError: 613 raise ImportError, \ 614 "Import Error: not able to import yaml: http://www.yaml.org " 615 616 fh=_get_fh(path,mode='r') 617 return yaml.load(fh)

618 619

620 -def _get_fh(path, mode='r'):

621 """ Return a file handle for given path. 622 623 Path can be a string or a file handle. 624 625 Attempt to uncompress/compress files ending in '.gz' and '.bz2'. 626 627 """ 628 if is_string_like(path): 629 if path.endswith('.gz'): 630 import gzip 631 fh = gzip.open(path,mode=mode) 632 elif path.endswith('.bz2'): 633 import bz2 634 fh = bz2.BZ2File(path,mode=mode) 635 else: 636 fh = file(path,mode=mode) 637 elif hasattr(path, 'seek'): 638 fh = path 639 else: 640 raise ValueError('path must be a string or file handle') 641 return fh

642

643 -def _test_suite():

644 import doctest 645 try: 646 import yaml 647 suite = doctest.DocFileSuite('tests/io.txt', 648 'tests/io_yaml.txt', 649 package='networkx') 650 except ImportError: 651 suite = doctest.DocFileSuite('tests/io.txt',package='networkx') 652 return suite

653 654 655 if __name__ == "__main__": 656 import os 657 import sys 658 import unittest 659 if sys.version_info[:2] < (2, 4): 660 print "Python version 2.4 or later required for tests (%d.%d detected)." % sys.version_info[:2] 661 sys.exit(-1) 662 # directory of networkx package (relative to this) 663 nxbase=sys.path[0]+os.sep+os.pardir 664 sys.path.insert(0,nxbase) # prepend to search path 665 unittest.TextTestRunner().run(_test_suite()) 666

Source Code for Module networkx.io