Source code for networkx.algorithms.community.local

"""
Local Community Detection Algorithms

Local Community Detection (LCD) aims to detected one or a few communities
starting from certain source nodes in the network. This differs from Global
Communty Detection (GCD), which aims to partition an entire network into
communities.

LCD is often useful when only a portion of the graph is known or the
graph is large enough that GCD is infeasable

[1]_ Gives a good introduction and overview of LCD

References
----------
.. [1] Baltsou, Georgia, Konstantinos Christopoulos, and Konstantinos Tsichlas.
   Local community detection: A survey. IEEE Access 10 (2022): 110701-110726.
   https://doi.org/10.1109/ACCESS.2022.3213980


"""

__all__ = ["greedy_source_expansion"]


def _clauset_greedy_source_expansion(G, *, source, cutoff=None):
    if cutoff is None:
        cutoff = float("inf")
    C = {source}
    B = {source}
    U = G[source].keys() - C
    T = {frozenset([node, nbr]) for node in B for nbr in G.neighbors(node)}
    I = {edge for edge in T if all(node in C for node in edge)}

    R_value = 0
    while len(C) < cutoff:
        if not U:
            break

        max_R = 0
        best_node = None
        best_node_B = best_node_T = best_node_I = set()

        for v in U:
            R_tmp, B_tmp, T_tmp, I_tmp = _calculate_local_modularity_for_candidate(
                G, v, C, B, T, I
            )
            if R_tmp > max_R:
                max_R = R_tmp
                best_node = v
                best_node_B = B_tmp
                best_node_T = T_tmp
                best_node_I = I_tmp

        C = C | {best_node}
        U.update(G[best_node].keys() - C)
        U.remove(best_node)
        B = best_node_B
        T = best_node_T
        I = best_node_I
        if max_R < R_value:
            break
        R_value = max_R

    return C


def _calculate_local_modularity_for_candidate(G, v, C, B, T, I):
    """
    Compute the local modularity R and updated variables when adding node v to the community.

    Parameters
    ----------
    G : NetworkX graph
        The input graph.
    v : node
        The candidate node to add to the community.
    C : set
        The current set of community nodes.
    B : set
        The current set of boundary nodes.
    T : set of frozenset
        The current set of boundary edges.
    I : set of frozenset
        The current set of internal boundary edges.

    Returns
    -------
    R_tmp : float
        The local modularity after adding node v.
    B_tmp : set
        The updated set of boundary nodes.
    T_tmp : set of frozenset
        The updated set of boundary edges.
    I_tmp : set of frozenset
        The updated set of internal boundary edges.
    """
    C_tmp = C | {v}
    B_tmp = B.copy()
    T_tmp = T.copy()
    I_tmp = I.copy()
    removed_B_nodes = set()

    # Update boundary nodes and edges
    for nbr in G[v]:
        if nbr not in C_tmp:
            # v has nbrs not in the community, so it remains a boundary node
            B_tmp.add(v)
            # Add edge between v and nbr to boundary edges
            T_tmp.add(frozenset([v, nbr]))

        if nbr in B:
            # Check if nbr should be removed from boundary nodes
            # Go through nbrs nbrs to see if it is still a boundary node
            nbr_still_in_B = any(nbr_nbr not in C_tmp for nbr_nbr in G[nbr])
            if not nbr_still_in_B:
                B_tmp.remove(nbr)
                removed_B_nodes.add(nbr)

        if nbr in C_tmp:
            # Add edge between v and nbr to internal edges
            I_tmp.add(frozenset([v, nbr]))

    # Remove edges no longer in the boundary
    for removed_node in removed_B_nodes:
        for removed_node_nbr in G[removed_node]:
            if removed_node_nbr not in B_tmp:
                T_tmp.discard(frozenset([removed_node_nbr, removed_node]))
                I_tmp.discard(frozenset([removed_node_nbr, removed_node]))

    R_tmp = len(I_tmp) / len(T_tmp) if len(T_tmp) > 0 else 1
    return R_tmp, B_tmp, T_tmp, I_tmp


ALGORITHMS = {
    "clauset": _clauset_greedy_source_expansion,
}


[docs] def greedy_source_expansion(G, *, source, cutoff=None, method="clauset"): r"""Find the local community around a source node. Find the local community around a source node using Greedy Source Expansion. Greedy Source Expansion generally identifies a local community starting from the source node and expands it based on the criteria of the choosen algorithm. The algorithm is specified with the `method` keyword argument. * `"clauset"` [1]_ uses local modularity gain to determine local communities. The algorithm adds nbring nodes that maximize local modularity to the community iteratively, stopping when no additional nodes improve the modularity or when a predefined cutoff is reached. Local modularity measures the density of edges within a community relative to the total graph. By focusing on local modularity, the algorithm efficiently uncovers communities around a specific node without requiring global optimization over the entire graph. The algorithm assumes that the graph $G$ consists of a known community $C$ and an unknown set of nodes $U$, which are adjacent to $C$ . The boundary of the community $B$, consists of nodes in $C$ that have at least one nbr in $U$. Mathematically, the local modularity is expressed as: .. math:: R = \frac{I}{T} where $T$ is the number of edges with one or more endpoints in $B$, and $I$ is the number of those edges with neither endpoint in $U$. Parameters ---------- G : NetworkX graph The input graph. source : node The source node from which the community expansion begins. cutoff : int, optional (default=None) The maximum number of nodes to include in the community. If None, the algorithm expands until no further modularity gain can be made. method : string, optional (default='clauset') The alogirhtm to use to carry out greedy source expansion. Supported options: 'clauset'. Other inputs produce a ValueError Returns ------- set A set of nodes representing the local community around the source node. Examples -------- >>> G = nx.karate_club_graph() >>> nx.community.greedy_source_expansion(G, source=16) {16, 0, 4, 5, 6, 10} Notes ----- This algorithm is designed for detecting local communities around a specific node, which is useful for large networks where global community detection is computationally expensive. The result of the algorithm may vary based on the structure of the graph, the choice of the source node, and the presence of ties between nodes during the greedy expansion process. References ---------- .. [1] Clauset, Aaron. Finding local community structure in networks. Physical Review E—Statistical, Nonlinear, and Soft Matter Physics 72, no. 2 (2005): 026132. https://arxiv.org/pdf/physics/0503036 """ try: algo = ALGORITHMS[method] except KeyError as e: raise ValueError(f"{method} is not a valid choice for an algorithm.") from e return algo(G, source=source, cutoff=cutoff)