networkx.readwrite.adjlist

1 """ 2 Read and write NetworkX graphs. 3 4 Note that NetworkX graphs can contain any hashable Python object as 5 node (not just integers and strings). So writing a NetworkX graph 6 as a text file may not always be what you want: see write_gpickle 7 and gread_gpickle for that case. 8 9 This module provides the following : 10 11 Adjacency list with single line per node: 12 Useful for connected or unconnected graphs without edge data. 13 14 write_adjlist(G, path) 15 G=read_adjlist(path) 16 17 Adjacency list with multiple lines per node: 18 Useful for connected or unconnected graphs with or without edge data. 19 20 write_multiline_adjlist(G, path) 21 read_multiline_adjlist(path) 22 23 """ 24 __author__ = """Aric Hagberg (hagberg@lanl.gov)\nDan Schult (dschult@colgate.edu)""" 25 __date__ = """""" 26 __credits__ = """""" 27 __revision__ = "" 28 # Copyright (C) 2004-2007 by 29 # Aric Hagberg <hagberg@lanl.gov> 30 # Dan Schult <dschult@colgate.edu> 31 # Pieter Swart <swart@lanl.gov> 32 # Distributed under the terms of the GNU Lesser General Public License 33 # http://www.gnu.org/copyleft/lesser.html 34 35 import cPickle 36 import codecs 37 import locale 38 import string 39 import sys 40 import time 41 42 from networkx.utils import is_string_like,_get_fh 43 import networkx 44

45 -def write_multiline_adjlist(G, path, delimiter=' ', comments='#'):

46 """ 47 Write the graph G in multiline adjacency list format to the file 48 or file handle path. 49 50 See read_multiline_adjlist for file format details. 51 52 >>> write_multiline_adjlist(G,"file.adjlist") 53 54 path can be a filehandle or a string with the name of the file. 55 56 >>> fh=open("file.adjlist") 57 >>> write_multiline_adjlist(G,fh) 58 59 Filenames ending in .gz or .bz2 will be compressed. 60 61 >>> write_multiline_adjlist(G,"file.adjlist.gz") 62 63 The file will use the default text encoding on your system. 64 It is possible to write files in other encodings by opening 65 the file with the codecs module. See doc/examples/unicode.py 66 for hints. 67 68 >>> import codecs 69 >>> fh=codecs.open("file.adjlist",encoding='utf=8') # use utf-8 encoding 70 >>> write_multiline_adjlist(G,fh) 71 72 """ 73 fh=_get_fh(path,mode='w') 74 pargs=comments+" "+string.join(sys.argv,' ') 75 fh.write("%s\n" % (pargs)) 76 fh.write(comments+" GMT %s\n" % (time.asctime(time.gmtime()))) 77 fh.write(comments+" %s\n" % (G.name)) 78 79 # directed 80 directed=G.is_directed() 81 82 seen={} # helper dict used to avoid duplicate edges 83 for s in G.nodes(): 84 edges=[ edge for edge in G.edges_iter(s) if edge[1] not in seen ] 85 deg=len(edges) 86 if is_string_like(s): 87 fh.write(s+delimiter) 88 else: 89 fh.write(str(s)+delimiter) 90 fh.write("%i\n"%(deg)) 91 for edge in edges: 92 t=edge[1] 93 if len(edge)==2: # Graph or DiGraph 94 d=None 95 else: # XGraph or XDiGraph 96 d=edge[2] # Note: could still be None 97 if d is None: 98 if is_string_like(t): 99 fh.write(t+'\n') 100 else: 101 fh.write(str(t)+'\n') 102 else: 103 if is_string_like(t): 104 fh.write(t+delimiter) 105 else: 106 fh.write(str(t)+delimiter) 107 if is_string_like(d): 108 fh.write(d+"\n") 109 else: 110 fh.write(str(d)+"\n") 111 if not directed: 112 seen[s]=1

113

114 -def read_multiline_adjlist(path, comments="#", delimiter=' ', 115 create_using=None, 116 nodetype=None, edgetype=None):

117 """Read graph in multi-line adjacency list format from path. 118 119 >>> G=read_multiline_adjlist("file.adjlist") 120 121 path can be a filehandle or a string with the name of the file. 122 123 >>> fh=open("file.adjlist") 124 >>> G=read_multiline_adjlist(fh) 125 126 Filenames ending in .gz or .bz2 will be compressed. 127 128 >>> G=read_multiline_adjlist("file.adjlist.gz") 129 130 nodetype is an optional function to convert node strings to nodetype 131 132 For example 133 134 >>> G=read_multiline_adjlist("file.adjlist", nodetype=int) 135 136 will attempt to convert all nodes to integer type 137 138 Since nodes must be hashable, the function nodetype must return hashable 139 types (e.g. int, float, str, frozenset - or tuples of those, etc.) 140 141 edgetype is a function to convert edge data strings to edgetype 142 143 >>> G=read_multiline_adjlist("file.adjlist", edgetype=int) 144 145 create_using is an optional networkx graph type, the default is 146 Graph(), a simple undirected graph 147 148 >>> G=read_multiline_adjlist("file.adjlist", create_using=DiGraph()) 149 150 The comments character (default='#') at the beginning of a 151 line indicates a comment line. 152 153 The entries are separated by delimiter (default=' '). 154 If whitespace is significant in node or edge labels you should use 155 some other delimiter such as a tab or other symbol. 156 157 158 Example multiline adjlist file format:: 159 160 # source target for Graph or DiGraph 161 a 2 162 b 163 c 164 d 1 165 e 166 167 or 168 169 # source target for XGraph or XDiGraph with edge data 170 a 2 171 b edge-ab-data 172 c edge-ac-data 173 d 1 174 e edge-de-data 175 176 Reading the file will use the default text encoding on your system. 177 It is possible to read files with other encodings by opening 178 the file with the codecs module. See doc/examples/unicode.py 179 for hints. 180 181 >>> import codecs 182 >>> fh=codecs.open("file.adjlist", encoding='utf=8') # use utf-8 encoding 183 >>> G=read_multiline_adjlist(fh) 184 """ 185 if create_using is None: 186 G=networkx.Graph() 187 else: 188 try: 189 G=create_using 190 G.clear() 191 except: 192 raise TypeError("Input graph is not a networkx graph type") 193 194 # is this a XGraph or XDiGraph? 195 if hasattr(G,'allow_multiedges')==True: 196 xgraph=True 197 else: 198 xgraph=False 199 200 inp=_get_fh(path) 201 202 for line in inp: 203 # if line.startswith("#") or line.startswith("\n"): 204 # continue 205 # line=line.strip() #remove trailing \n 206 line = line[:line.find(comments)].strip() 207 if not len(line): continue 208 try: 209 (u,deg)=line.split(delimiter) 210 deg=int(deg) 211 except: 212 raise TypeError("Failed to read node and degree on line (%s)"%line) 213 try: 214 if nodetype is not None: 215 u=nodetype(u) 216 except: 217 raise TypeError("Failed to convert node (%s) to type %s"\ 218 %(u,nodetype)) 219 G.add_node(u) 220 for i in range(deg): 221 line=inp.next().strip() 222 vlist=line.split(delimiter) 223 if len(vlist)==1: 224 v=vlist[0] 225 d=None 226 elif len(vlist)==2: 227 (v,d)=vlist 228 else: 229 raise TypeError("Failed to read line: %s"%vlist) 230 try: 231 if nodetype is not None: 232 v=nodetype(v) 233 except: 234 raise TypeError("Failed to convert node (%s) to type %s"\ 235 %(v,nodetype)) 236 if xgraph: 237 if d is not None: 238 try: 239 if edgetype is not None: 240 d=edgetype(d) 241 except: 242 raise TypeError\ 243 ("Failed to convert edge data (%s) to type %s"\ 244 %(d, edgetype)) 245 G.add_edge(u,v,d) 246 else: 247 G.add_edge(u,v) 248 249 return G

250 251

252 -def write_adjlist(G, path, comments="#", delimiter=' '):

253 """Write graph G in single-line adjacency-list format to path. 254 255 See read_adjlist for file format details. 256 257 >>> write_adjlist(G, "file.adjlist") 258 259 path can be a filehandle or a string with the name of the file. 260 261 >>> fh=open("file.adjlist") 262 >>> write_adjlist(G, fh) 263 264 Filenames ending in .gz or .bz2 will be compressed. 265 266 >>> write_adjlist(G, "file.adjlist.gz") 267 268 The file will use the default text encoding on your system. 269 It is possible to write files in other encodings by opening 270 the file with the codecs module. See doc/examples/unicode.py 271 for hints. 272 273 >>> import codecs 274 >>> fh=codecs.open("file.adjlist",encoding='utf=8') # use utf-8 encoding 275 >>> write_adjlist(G,fh) 276 277 Does not handle data in XGraph or XDiGraph, use 'write_edgelist' 278 or 'write_multiline_adjlist' 279 """ 280 fh=_get_fh(path,mode='w') 281 pargs=comments+" "+string.join(sys.argv,' ') 282 fh.write("%s\n" % (pargs)) 283 fh.write(comments+" GMT %s\n" % (time.asctime(time.gmtime()))) 284 fh.write(comments+" %s\n" % (G.name)) 285 e={} # helper dict used to avoid duplicate edges 286 try: 287 multiedges=G.multiedges 288 except: 289 multiedges=False 290 291 # directed 292 directed=G.is_directed() 293 294 for s in G.nodes(): 295 if is_string_like(s): 296 fh.write(s+delimiter) 297 else: 298 fh.write(str(s)+delimiter) 299 for t in G.neighbors(s): 300 if not directed: 301 if e.has_key((t,s)): 302 continue 303 e.setdefault((s,t),1) 304 if multiedges: 305 for d in G.get_edge(s,t): 306 if is_string_like(t): 307 fh.write(t+delimiter) 308 else: 309 fh.write(str(t)+delimiter) 310 else: 311 if is_string_like(t): 312 fh.write(t+delimiter) 313 else: 314 fh.write(str(t)+delimiter) 315 fh.write("\n")

316 317

318 -def read_adjlist(path, comments="#", delimiter=' ', 319 create_using=None, nodetype=None):

320 """Read graph in single line adjacency list format from path. 321 322 >>> G=read_adjlist("file.adjlist") 323 324 path can be a filehandle or a string with the name of the file. 325 326 >>> fh=open("file.adjlist") 327 >>> G=read_adjlist(fh) 328 329 Filenames ending in .gz or .bz2 will be compressed. 330 331 >>> G=read_adjlist("file.adjlist.gz") 332 333 nodetype is an optional function to convert node strings to nodetype 334 335 For example 336 337 >>> G=read_adjlist("file.adjlist", nodetype=int) 338 339 will attempt to convert all nodes to integer type 340 341 Since nodes must be hashable, the function nodetype must return hashable 342 types (e.g. int, float, str, frozenset - or tuples of those, etc.) 343 344 create_using is an optional networkx graph type, the default is 345 Graph(), a simple undirected graph 346 347 >>> G=read_adjlist("file.adjlist", create_using=DiGraph()) 348 349 Does not handle edge data: use 'read_edgelist' or 'read_multiline_adjlist' 350 351 The comments character (default='#') at the beginning of a 352 line indicates a comment line. 353 354 The entries are separated by delimiter (default=' '). 355 If whitespace is significant in node or edge labels you should use 356 some other delimiter such as a tab or other symbol. 357 358 # source target 359 a b c 360 d e 361 362 """ 363 if create_using is None: 364 G=networkx.Graph() 365 else: 366 try: 367 G=create_using 368 G.clear() 369 except: 370 raise TypeError("Input graph is not a networkx graph type") 371 372 fh=_get_fh(path) 373 374 for line in fh.readlines(): 375 line = line[:line.find(comments)].strip() 376 if not len(line): continue 377 # if line.startswith("#") or line.startswith("\n"): 378 # continue 379 # line=line.strip() #remove trailing \n 380 vlist=line.split(delimiter) 381 u=vlist.pop(0) 382 # convert types 383 if nodetype is not None: 384 try: 385 u=nodetype(u) 386 except: 387 raise TypeError("Failed to convert node (%s) to type %s"\ 388 %(u,nodetype)) 389 G.add_node(u) 390 try: 391 vlist=map(nodetype,vlist) 392 except: 393 raise TypeError("Failed to convert nodes (%s) to type %s"\ 394 %(','.join(vlist),nodetype)) 395 for v in vlist: 396 G.add_edge(u,v) 397 return G

398 399 400

401 -def _test_suite():

402 import doctest 403 suite = doctest.DocFileSuite('tests/readwrite/adjlist.txt',package='networkx') 404 return suite

405 406 407 if __name__ == "__main__": 408 import os 409 import sys 410 import unittest 411 if sys.version_info[:2] < (2, 4): 412 print "Python version 2.4 or later required for tests (%d.%d detected)." % sys.version_info[:2] 413 sys.exit(-1) 414 # directory of networkx package (relative to this) 415 nxbase=sys.path[0]+os.sep+os.pardir 416 sys.path.insert(0,nxbase) # prepend to search path 417 unittest.TextTestRunner().run(_test_suite()) 418

Source Code for Module networkx.readwrite.adjlist