From 5b9eff1526dce2943cb38257b9bc17d166690842 Mon Sep 17 00:00:00 2001 From: Kevin Brown Date: Mon, 29 Apr 2019 10:35:18 -0700 Subject: [PATCH 1/2] Modified code to work with edge weights. --- LICENSE | 16 +- README.rst | 5 +- modularity_maximization/__init__.py | 13 +- modularity_maximization/_divide.py | 17 +- modularity_maximization/modularity.py | 209 ++++++++++++++++++ .../{community_newman.py => spectralopt.py} | 40 ++-- 6 files changed, 257 insertions(+), 43 deletions(-) create mode 100644 modularity_maximization/modularity.py rename modularity_maximization/{community_newman.py => spectralopt.py} (69%) diff --git a/LICENSE b/LICENSE index c0083cf..0cf07d2 100644 --- a/LICENSE +++ b/LICENSE @@ -1,7 +1,15 @@ -Copyright 2018 Zhiya Zuo +Copyright 2018 Zhiya Zuo, Kevin S. Brown -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in all copies or substantial portions +of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO +EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.rst b/README.rst index 3bf0836..5e5f726 100644 --- a/README.rst +++ b/README.rst @@ -14,9 +14,12 @@ Specifically, ``big_10_football_directed.gml`` is compiled by myself to test com Change log: +- 04-29-2019 (Kevin S. Brown) + Pushed changes that allow working with weighted graphs (either directed or undirected). + - 02-23-2018 Test on Python 3 - 10-20-2017 Updated python codes to use NetworkX 2 APIs. See https://networkx.github.io/documentation/stable/release/release_2.0.html. - Later in the day, I added a wrapper function to retrieve the largest eigenvalue and vector for 2x2 matrices since scipy.sparse.linalg.eigs do not work in that case. + Later in the day, I added a wrapper function to retrieve the largest eigenvalue and vector for 2x2 matrices since scipy.sparse.linalg.eigs do not work in that case. diff --git a/modularity_maximization/__init__.py b/modularity_maximization/__init__.py index 4f56aba..852515b 100644 --- a/modularity_maximization/__init__.py +++ b/modularity_maximization/__init__.py @@ -1,14 +1,14 @@ # -*- coding: utf-8 -*- -from modularity_maximization.community_newman import partition -from modularity_maximization.utils import get_modularity +from modularity_maximization.spectralopt import partition +from modularity_maximization.modularity import modularity -__version__ = '0.0.1rc4' -__author__ = """Zhiya Zuo""" -__email__ = """zhiyazuo@gmail.com""" +__version__ = '0.7.0' +__author__ = """Zhiya Zuo and Kevin S. Brown""" +__email__ = """zhiyazuo@gmail.com and kevin.brown@oregonstate.edu""" __license__ = """ -Copyright 2018 Zhiya Zuo +Copyright 2018 Zhiya Zuo and Kevin S. Brown Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: @@ -16,4 +16,3 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ - diff --git a/modularity_maximization/_divide.py b/modularity_maximization/_divide.py index d4286fa..2f03d02 100644 --- a/modularity_maximization/_divide.py +++ b/modularity_maximization/_divide.py @@ -3,7 +3,7 @@ import numpy as np import networkx as nx from scipy import sparse -from modularity_maximization import utils +from modularity_maximization import modularity def _divide(network, community_dict, comm_index, B, refine=False): ''' @@ -25,11 +25,11 @@ def _divide(network, community_dict, comm_index, B, refine=False): comm_nodes = tuple(u for u in community_dict \ if community_dict[u] == comm_index) - B_hat_g = utils.get_mod_matrix(network, comm_nodes, B) + B_hat_g = modularity.get_mod_matrix(network, comm_nodes, B) # compute the top eigenvector u₁ and β₁ if B_hat_g.shape[0] < 3: - beta_s, u_s = utils.largest_eig(B_hat_g) + beta_s, u_s = modularity.largest_eig(B_hat_g) else: beta_s, u_s = sparse.linalg.eigs(B_hat_g, k=1, which='LR') u_1 = u_s[:, 0] @@ -39,7 +39,7 @@ def _divide(network, community_dict, comm_index, B, refine=False): s = sparse.csc_matrix(np.asmatrix([[1 if u_1_i > 0 else -1] for u_1_i in u_1])) if refine: improve_modularity(network, comm_nodes, s, B) - delta_modularity = utils._get_delta_Q(B_hat_g, s) + delta_modularity = modularity._get_delta_Q(B_hat_g, s) if delta_modularity > 0: g1_nodes = np.array([comm_nodes[i] \ for i in range(u_1.shape[0]) \ @@ -53,6 +53,7 @@ def _divide(network, community_dict, comm_index, B, refine=False): # indivisble, return None return None, None + def improve_modularity(network, comm_nodes, s, B): ''' Fine tuning of the initial division from `_divide` @@ -71,7 +72,7 @@ def improve_modularity(network, comm_nodes, s, B): ''' # iterate until no increment of Q - B_hat_g = utils.get_mod_matrix(network, comm_nodes, B) + B_hat_g = modularity.get_mod_matrix(network, comm_nodes, B) while True: unmoved = list(comm_nodes) # node indices to be moved @@ -81,12 +82,12 @@ def improve_modularity(network, comm_nodes, s, B): # keep moving until none left while len(unmoved) > 0: # init Q - Q0 = utils._get_delta_Q(B_hat_g, s) + Q0 = modularity._get_delta_Q(B_hat_g, s) scores = np.zeros(len(unmoved)) for k_index in range(scores.size): k = comm_nodes.index(unmoved[k_index]) s[k, 0] = -s[k, 0] - scores[k_index] = utils._get_delta_Q(B_hat_g, s) - Q0 + scores[k_index] = modularity._get_delta_Q(B_hat_g, s) - Q0 s[k, 0] = -s[k, 0] _j = np.argmax(scores) j = comm_nodes.index(unmoved[_j]) @@ -112,6 +113,6 @@ def improve_modularity(network, comm_nodes, s, B): delta_modularity = 0 else: delta_modularity = node_improvement[max_index] - # Stop if ΔQ <= 0 + # Stop if ΔQ <= 0 if delta_modularity <= 0: break diff --git a/modularity_maximization/modularity.py b/modularity_maximization/modularity.py new file mode 100644 index 0000000..4fd61c8 --- /dev/null +++ b/modularity_maximization/modularity.py @@ -0,0 +1,209 @@ +# -*- coding: utf-8 -*- +import numpy as np +import networkx as nx +from scipy import sparse +from scipy.linalg import eig +from itertools import product + + +def transform_net_and_part(network,partition): + ''' + Accepts an input network and a community partition (keys are nodes, + values are community ID) and returns a version of the network and + partition with nodes in the range 0,...,len(G.nodes())-1. This + lets you directly map edges to elements of the modularity matrix. + + Returns the modified network and partition. + ''' + network = nx.convert_node_labels_to_integers(network, first_label=0, label_attribute="node_name") + node_to_name = nx.get_node_attributes(network, 'node_name') + # reverse the node_name dict to flip the partition + name_to_node = {v:k for k,v in node_to_name.items()} + int_partition = {} + for k in partition: + int_partition[name_to_node[k]] = partition[k] + return network,int_partition + + +def reverse_partition(partition): + ''' + Accepts an input graph partition in the form node:community_id and returns + a dictionary of the form community_id:[node_1,node_2,...]. + ''' + reverse_partition = {} + for p in partition: + if partition[p] in reverse_partition: + reverse_partition[partition[p]].append(p) + else: + reverse_partition[partition[p]] = [p] + return reverse_partition + + +def modularity(network, partition): + ''' + Computes the modularity; works for Directed and Undirected Graphs, both + unweighted and weighted. + ''' + # put the network and partition into integer node format + network,partition = transform_net_and_part(network,partition) + # get the modularity matrix + Q = get_base_modularity_matrix(network) + if type(network) == nx.Graph: + norm_fac = 2.*(network.number_of_edges()) + if nx.is_weighted(network): + # 2*0.5*sum_{ij} A_{ij} + norm_fac = nx.to_scipy_sparse_matrix(network).sum() + elif type(network) == nx.DiGraph: + norm_fac = 1.*network.number_of_edges() + if nx.is_weighted(network): + # sum_{ij} A_{ij} + norm_fac = nx.to_scipy_sparse_matrix(network).sum() + else: + print('Invalid graph type') + raise TypeError + # reverse the partition dictionary + rev_part = reverse_partition(partition) + # get the list of all within-community pairs + pairs = [] + for p in rev_part: + for i,j in product(rev_part[p],rev_part[p]): + pairs.append((i,j)) + # now sum up all the appropriate values + return sum([Q[x] for x in pairs])/norm_fac + + +def get_base_modularity_matrix(network): + ''' + Obtain the modularity matrix for the whole network. Assumes any edge weights + use the key 'weight' in the edge attribute. + + Parameters + ---------- + network : nx.Graph or nx.DiGraph + The network of interest + + Returns + ------- + np.matrix + The modularity matrix for `network` + + Raises + ------ + TypeError + When the input `network` does not fit either nx.Graph or nx.DiGraph + ''' + + if type(network) == nx.Graph: + if nx.is_weighted(network): + return sparse.csc_matrix(nx.modularity_matrix(network,weight='weight')) + return sparse.csc_matrix(nx.modularity_matrix(network)) + elif type(network) == nx.DiGraph: + if nx.is_weighted(network): + return sparse.csc_matrix(nx.directed_modularity_matrix(network,weight='weight')) + return sparse.csc_matrix(nx.directed_modularity_matrix(network)) + else: + raise TypeError('Graph type not supported. Use either nx.Graph or nx.Digraph') + + +def _get_delta_Q(X, a): + ''' + Calculate the delta modularity + .. math:: + \deltaQ = s^T \cdot \^{B_{g}} \cdot s + .. math:: \deltaQ = s^T \cdot \^{B_{g}} \cdot s + + Parameters + ---------- + X : np.matrix + B_hat_g + a : np.matrix + s, which is the membership vector + + Returns + ------- + float + The corresponding :math:`\deltaQ` + ''' + + delta_Q = (a.T.dot(X)).dot(a) + return delta_Q[0,0] + + +def get_mod_matrix(network, comm_nodes=None, B=None): + ''' + This function computes the modularity matrix + for a specific group in the network. + (a.k.a., generalized modularity matrix) + + Specifically, + .. math:: + B^g_{i,j} = B_ij - \delta_{ij} \sum_(k \in g) B_ik + m = \abs[\Big]{E} + B_ij = A_ij - \dfrac{k_i k_j}{2m} + OR... + B_ij = \(A_ij - \frac{k_i^{in} k_j^{out}}{m} + + When `comm_nodes` is None or all nodes in `network`, this reduces to :math:`B` + + Parameters + ---------- + network : nx.Graph or nx.DiGraph + The network of interest + comm_nodes : iterable (list, np.array, or tuple) + List of nodes that defines a community + B : np.matrix + Modularity matrix of `network` + + Returns + ------- + np.matrix + The modularity of `comm_nodes` within `network` + ''' + + if comm_nodes is None: + comm_nodes = list(network) + return get_base_modularity_matrix(network) + + if B is None: + B = get_base_modularity_matrix(network) + + # subset of mod matrix in g + indices = [list(network).index(u) for u in comm_nodes] + B_g = B[indices, :][:, indices] + #print 'Type of `B_g`:', type(B_g) + + # B^g_(i,j) = B_ij - δ_ij * ∑_(k∈g) B_ik + # i, j ∈ g + B_hat_g = np.zeros((len(comm_nodes), len(comm_nodes)), dtype=float) + + # ∑_(k∈g) B_ik + B_g_rowsum = np.asarray(B_g.sum(axis=1))[:, 0] + if type(network) == nx.Graph: + B_g_colsum = np.copy(B_g_rowsum) + elif type(network) == nx.DiGraph: + B_g_colsum = np.asarray(B_g.sum(axis=0))[0, :] + + for i in range(B_hat_g.shape[0]): + for j in range(B_hat_g.shape[0]): + if i == j: + B_hat_g[i,j] = B_g[i,j] - 0.5 * (B_g_rowsum[i] + B_g_colsum[i]) + else: + B_hat_g[i,j] = B_g[i,j] + + if type(network) == nx.DiGraph: + B_hat_g = B_hat_g + B_hat_g.T + + return sparse.csc_matrix(B_hat_g) + + +def largest_eig(A): + ''' + A wrapper over `scipy.linalg.eig` to produce + largest eigval and eigvector for A when A.shape is small + ''' + vals, vectors = eig(A.todense()) + real_indices = [idx for idx, val in enumerate(vals) if not bool(val.imag)] + vals = [vals[i].real for i in range(len(real_indices))] + vectors = [vectors[i] for i in range(len(real_indices))] + max_idx = np.argsort(vals)[-1] + return np.asarray([vals[max_idx]]), np.asarray([vectors[max_idx]]).T diff --git a/modularity_maximization/community_newman.py b/modularity_maximization/spectralopt.py similarity index 69% rename from modularity_maximization/community_newman.py rename to modularity_maximization/spectralopt.py index da7cbeb..9355081 100644 --- a/modularity_maximization/community_newman.py +++ b/modularity_maximization/spectralopt.py @@ -3,15 +3,14 @@ import numpy as np import networkx as nx from collections import deque -from modularity_maximization import utils, _divide +from modularity_maximization import modularity, _divide def partition(network, refine=True): ''' Cluster a network into several modules using modularity maximization by spectral methods. - Supports directed and undirected networks. - Edge weights are ignored + Supports directed and undirected networks, with weighted or unweighted edges. See: @@ -40,10 +39,7 @@ def partition(network, refine=True): network = nx.convert_node_labels_to_integers(network, first_label=1, label_attribute="node_name") node_name = nx.get_node_attributes(network, 'node_name') - ## only support unweighted network - nx.set_edge_attributes(G=network, name='weight', values={edge:1 for edge in network.edges}) - - B = utils.get_base_modularity_matrix(network) + B = modularity.get_base_modularity_matrix(network) ## set flags for divisibility of communities ## initial community is divisible @@ -71,30 +67,28 @@ def partition(network, refine=True): ## add g1, g2 to tree and divisible list comm_counter += 1 - #community_tree.create_node(comm_counter, "%d" %comm_counter,\ - # parent = parent, data = g1_nodes) divisible_community.append(comm_counter) ## update community for u in g1: community_dict[u] = comm_counter - #community_tree.create_node(comm_counter, "%d" %comm_counter,\ - # parent = parent, data = list(g2)) comm_counter += 1 divisible_community.append(comm_counter) ## update community for u in g2: community_dict[u] = comm_counter - ''' - print '------' - community_tree.show() - partition = [] - for comm_index in set(community_dict.values()): - print comm_index - partition.append(set([node_name[i] for i in community_dict if community_dict[i]==comm_index])) - print sorted(list(partition[-1])) - print 'Modularity: ', utils.get_modularity(network, community_dict) - ''' - - return {node_name[u]: community_dict[u] for u in network} + # corrects partition numbering to be in 0,...,M-1 (matches python implementation + # of the Louvain algorithm), and restore names of nodes + # old to new numbering + old_to_new = {} + new_val = 0 + for v in set(community_dict.values()): + old_to_new[v] = new_val + new_val += 1 + # remap + optimal_partition = {} + for k in community_dict: + optimal_partition[node_name[k]] = old_to_new[community_dict[k]] + + return optimal_partition From 98c008cbb6424d096ead207f6d769d546cd3fa5c Mon Sep 17 00:00:00 2001 From: Kevin Brown Date: Mon, 29 Apr 2019 10:37:23 -0700 Subject: [PATCH 2/2] Minor addition to changelog. --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 5e5f726..de8b5d4 100644 --- a/README.rst +++ b/README.rst @@ -15,7 +15,7 @@ Specifically, ``big_10_football_directed.gml`` is compiled by myself to test com Change log: - 04-29-2019 (Kevin S. Brown) - Pushed changes that allow working with weighted graphs (either directed or undirected). + Edge weights are now supported, for both directed and undirected graphs. - 02-23-2018 Test on Python 3