# duplication.py - functions for generating graphs by duplicating nodes
#
# Copyright 2016-2019 NetworkX developers.
# Copyright (C) 2004-2019 by
# Aric Hagberg <hagberg@lanl.gov>
# Dan Schult <dschult@colgate.edu>
# Pieter Swart <swart@lanl.gov>
#
# This file is part of NetworkX.
#
# NetworkX is distributed under a BSD license; see LICENSE.txt for more
# information.
"""Functions for generating graphs based on the "duplication" method.
These graph generators start with a small initial graph then duplicate
nodes and (partially) duplicate their edges. These functions are
generally inspired by biological networks.
"""
import networkx as nx
from networkx.utils import py_random_state
from networkx.exception import NetworkXError
__all__ = ['partial_duplication_graph', 'duplication_divergence_graph']
[docs]@py_random_state(4)
def partial_duplication_graph(N, n, p, q, seed=None):
"""Returns a random graph using the partial duplication model.
Parameters
----------
N : int
The total number of nodes in the final graph.
n : int
The number of nodes in the initial clique.
p : float
The probability of joining each neighbor of a node to the
duplicate node. Must be a number in the between zero and one,
inclusive.
q : float
The probability of joining the source node to the duplicate
node. Must be a number in the between zero and one, inclusive.
seed : integer, random_state, or None (default)
Indicator of random number generation state.
See :ref:`Randomness<randomness>`.
Notes
-----
A graph of nodes is grown by creating a fully connected graph
of size `n`. The following procedure is then repeated until
a total of `N` nodes have been reached.
1. A random node, *u*, is picked and a new node, *v*, is created.
2. For each neighbor of *u* an edge from the neighbor to *v* is created
with probability `p`.
3. An edge from *u* to *v* is created with probability `q`.
This algorithm appears in [1].
This implementation allows the possibility of generating
disconnected graphs.
References
----------
.. [1] Knudsen Michael, and Carsten Wiuf. "A Markov chain approach to
randomly grown graphs." Journal of Applied Mathematics 2008.
<https://doi.org/10.1155/2008/190836>
"""
if p < 0 or p > 1 or q < 0 or q > 1:
msg = "partial duplication graph must have 0 <= p, q <= 1."
raise NetworkXError(msg)
if n > N:
raise NetworkXError("partial duplication graph must have n <= N.")
G = nx.complete_graph(n)
for new_node in range(n, N):
# Pick a random vertex, u, already in the graph.
src_node = seed.randint(0, new_node - 1)
# Add a new vertex, v, to the graph.
G.add_node(new_node)
# For each neighbor of u...
for neighbor_node in list(nx.all_neighbors(G, src_node)):
# Add the neighbor to v with probability p.
if seed.random() < p:
G.add_edge(new_node, neighbor_node)
# Join v and u with probability q.
if seed.random() < q:
G.add_edge(new_node, src_node)
return G
[docs]@py_random_state(2)
def duplication_divergence_graph(n, p, seed=None):
"""Returns an undirected graph using the duplication-divergence model.
A graph of `n` nodes is created by duplicating the initial nodes
and retaining edges incident to the original nodes with a retention
probability `p`.
Parameters
----------
n : int
The desired number of nodes in the graph.
p : float
The probability for retaining the edge of the replicated node.
seed : integer, random_state, or None (default)
Indicator of random number generation state.
See :ref:`Randomness<randomness>`.
Returns
-------
G : Graph
Raises
------
NetworkXError
If `p` is not a valid probability.
If `n` is less than 2.
Notes
-----
This algorithm appears in [1].
This implementation disallows the possibility of generating
disconnected graphs.
References
----------
.. [1] I. Ispolatov, P. L. Krapivsky, A. Yuryev,
"Duplication-divergence model of protein interaction network",
Phys. Rev. E, 71, 061911, 2005.
"""
if p > 1 or p < 0:
msg = "NetworkXError p={0} is not in [0,1].".format(p)
raise nx.NetworkXError(msg)
if n < 2:
msg = 'n must be greater than or equal to 2'
raise nx.NetworkXError(msg)
G = nx.Graph()
# Initialize the graph with two connected nodes.
G.add_edge(0, 1)
i = 2
while i < n:
# Choose a random node from current graph to duplicate.
random_node = seed.choice(list(G))
# Make the replica.
G.add_node(i)
# flag indicates whether at least one edge is connected on the replica.
flag = False
for nbr in G.neighbors(random_node):
if seed.random() < p:
# Link retention step.
G.add_edge(i, nbr)
flag = True
if not flag:
# Delete replica if no edges retained.
G.remove_node(i)
else:
# Successful duplication.
i += 1
return G