zhiyzuo · thelahunginjeet · Apr 29, 2019 · Apr 29, 2019 · zhiyzuo · May 4, 2019
diff --git a/LICENSE b/LICENSE
@@ -1,7 +1,15 @@
-Copyright 2018 Zhiya Zuo
+Copyright 2018 Zhiya Zuo, Kevin S. Brown
 
-Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
+to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 
-The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+The above copyright notice and this permission notice shall be included in all copies or substantial portions
+of the Software.
 
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
+LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
+EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.rst b/README.rst
@@ -14,9 +14,12 @@ Specifically, ``big_10_football_directed.gml`` is compiled by myself to test com
 
 Change log:
 
+- 04-29-2019 (Kevin S. Brown)
+  Edge weights are now supported, for both directed and undirected graphs.
+
 - 02-23-2018
   Test on Python 3
 
 - 10-20-2017
   Updated python codes to use NetworkX 2 APIs. See https://networkx.github.io/documentation/stable/release/release_2.0.html.
-  Later in the day, I added a wrapper function to retrieve the largest eigenvalue and vector for 2x2 matrices since scipy.sparse.linalg.eigs do not work in that case. 
+  Later in the day, I added a wrapper function to retrieve the largest eigenvalue and vector for 2x2 matrices since scipy.sparse.linalg.eigs do not work in that case.
diff --git a/modularity_maximization/__init__.py b/modularity_maximization/__init__.py
@@ -1,19 +1,18 @@
 # -*- coding: utf-8 -*-
 
-from modularity_maximization.community_newman import partition
-from modularity_maximization.utils import get_modularity
+from modularity_maximization.spectralopt import partition
+from modularity_maximization.modularity import modularity
 
-__version__ = '0.0.1rc4'
-__author__ = """Zhiya Zuo"""
-__email__ = """zhiyazuo@gmail.com"""
+__version__ = '0.7.0'
+__author__ = """Zhiya Zuo and Kevin S. Brown"""
+__email__ = """zhiyazuo@gmail.com and kevin.brown@oregonstate.edu"""
 
 __license__ = """
-Copyright 2018 Zhiya Zuo
+Copyright 2018 Zhiya Zuo and Kevin S. Brown
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 
 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 """
-
diff --git a/modularity_maximization/_divide.py b/modularity_maximization/_divide.py
@@ -3,7 +3,7 @@
 import numpy as np
 import networkx as nx
 from scipy import sparse
-from modularity_maximization import utils
+from modularity_maximization import modularity
 
 def _divide(network, community_dict, comm_index, B, refine=False):
     '''
@@ -25,11 +25,11 @@ def _divide(network, community_dict, comm_index, B, refine=False):
 
     comm_nodes = tuple(u for u in community_dict \
                   if community_dict[u] == comm_index)
-    B_hat_g = utils.get_mod_matrix(network, comm_nodes, B)
+    B_hat_g = modularity.get_mod_matrix(network, comm_nodes, B)
 
     # compute the top eigenvector u₁ and β₁
     if B_hat_g.shape[0] < 3:
-        beta_s, u_s = utils.largest_eig(B_hat_g)
+        beta_s, u_s = modularity.largest_eig(B_hat_g)
     else:
         beta_s, u_s = sparse.linalg.eigs(B_hat_g, k=1, which='LR')
     u_1 = u_s[:, 0]
@@ -39,7 +39,7 @@ def _divide(network, community_dict, comm_index, B, refine=False):
         s = sparse.csc_matrix(np.asmatrix([[1 if u_1_i > 0 else -1] for u_1_i in u_1]))
         if refine:
             improve_modularity(network, comm_nodes, s, B)
-        delta_modularity = utils._get_delta_Q(B_hat_g, s)
+        delta_modularity = modularity._get_delta_Q(B_hat_g, s)
         if delta_modularity > 0:
             g1_nodes = np.array([comm_nodes[i] \
                                  for i in range(u_1.shape[0]) \
@@ -53,6 +53,7 @@ def _divide(network, community_dict, comm_index, B, refine=False):
     # indivisble, return None
     return None, None
 
+
 def improve_modularity(network, comm_nodes, s, B):
     '''
     Fine tuning of the initial division from `_divide`
@@ -71,7 +72,7 @@ def improve_modularity(network, comm_nodes, s, B):
     '''
 
     # iterate until no increment of Q
-    B_hat_g = utils.get_mod_matrix(network, comm_nodes, B)
+    B_hat_g = modularity.get_mod_matrix(network, comm_nodes, B)
     while True:
         unmoved = list(comm_nodes)
         # node indices to be moved
@@ -81,12 +82,12 @@ def improve_modularity(network, comm_nodes, s, B):
         # keep moving until none left
         while len(unmoved) > 0:
             # init Q
-            Q0 = utils._get_delta_Q(B_hat_g, s)
+            Q0 = modularity._get_delta_Q(B_hat_g, s)
             scores = np.zeros(len(unmoved))
             for k_index in range(scores.size):
                 k = comm_nodes.index(unmoved[k_index])
                 s[k, 0] = -s[k, 0]
-                scores[k_index] = utils._get_delta_Q(B_hat_g, s) - Q0
+                scores[k_index] = modularity._get_delta_Q(B_hat_g, s) - Q0
                 s[k, 0] = -s[k, 0]
             _j = np.argmax(scores)
             j = comm_nodes.index(unmoved[_j])
@@ -112,6 +113,6 @@ def improve_modularity(network, comm_nodes, s, B):
             delta_modularity = 0
         else:
             delta_modularity = node_improvement[max_index]
-        # Stop if ΔQ <= 0 
+        # Stop if ΔQ <= 0
         if delta_modularity <= 0:
             break
diff --git a/modularity_maximization/modularity.py b/modularity_maximization/modularity.py
@@ -0,0 +1,209 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+import networkx as nx
+from scipy import sparse
+from scipy.linalg import eig
+from itertools import product
+
+
+def transform_net_and_part(network,partition):
+    '''
+    Accepts an input network and a community partition (keys are nodes,
+    values are community ID) and returns a version of the network and
+    partition with nodes in the range 0,...,len(G.nodes())-1.  This
+    lets you directly map edges to elements of the modularity matrix.
+
+    Returns the modified network and partition.
+    '''
+    network = nx.convert_node_labels_to_integers(network, first_label=0, label_attribute="node_name")
+    node_to_name = nx.get_node_attributes(network, 'node_name')
+    # reverse the node_name dict to flip the partition
+    name_to_node = {v:k for k,v in node_to_name.items()}
+    int_partition = {}
+    for k in partition:
+        int_partition[name_to_node[k]] = partition[k]
+    return network,int_partition
+
+
+def reverse_partition(partition):
+    '''
+    Accepts an input graph partition in the form node:community_id and returns
+    a dictionary of the form community_id:[node_1,node_2,...].
+    '''
+    reverse_partition = {}
+    for p in partition:
+        if partition[p] in reverse_partition:
+            reverse_partition[partition[p]].append(p)
+        else:
+            reverse_partition[partition[p]] = [p]
+    return reverse_partition
+
+
+def modularity(network, partition):
+    '''
+    Computes the modularity; works for Directed and Undirected Graphs, both
+    unweighted and weighted.
+    '''
+    # put the network and partition into integer node format
+    network,partition = transform_net_and_part(network,partition)
+    # get the modularity matrix
+    Q = get_base_modularity_matrix(network)
+    if type(network) == nx.Graph:
+        norm_fac = 2.*(network.number_of_edges())
+        if nx.is_weighted(network):
+            # 2*0.5*sum_{ij} A_{ij}
+            norm_fac = nx.to_scipy_sparse_matrix(network).sum()
+    elif type(network) == nx.DiGraph:
+        norm_fac = 1.*network.number_of_edges()
+        if nx.is_weighted(network):
+            # sum_{ij} A_{ij}
+            norm_fac = nx.to_scipy_sparse_matrix(network).sum()
+    else:
+        print('Invalid graph type')
+        raise TypeError
+    # reverse the partition dictionary
+    rev_part = reverse_partition(partition)
+    # get the list of all within-community pairs
+    pairs = []
+    for p in rev_part:
+        for i,j in product(rev_part[p],rev_part[p]):
+            pairs.append((i,j))
+    # now sum up all the appropriate values
+    return sum([Q[x] for x in pairs])/norm_fac
+
+
+def get_base_modularity_matrix(network):
+    '''
+    Obtain the modularity matrix for the whole network.  Assumes any edge weights
+    use the key 'weight' in the edge attribute.
+
+    Parameters
+    ----------
+    network : nx.Graph or nx.DiGraph
+        The network of interest
+
+    Returns
+    -------
+    np.matrix
+        The modularity matrix for `network`
+
+    Raises
+    ------
+    TypeError
+        When the input `network` does not fit either nx.Graph or nx.DiGraph
+    '''
+
+    if type(network) == nx.Graph:
+        if nx.is_weighted(network):
+            return sparse.csc_matrix(nx.modularity_matrix(network,weight='weight'))
+        return sparse.csc_matrix(nx.modularity_matrix(network))
+    elif type(network) == nx.DiGraph:
+        if nx.is_weighted(network):
+            return sparse.csc_matrix(nx.directed_modularity_matrix(network,weight='weight'))
+        return sparse.csc_matrix(nx.directed_modularity_matrix(network))
+    else:
+        raise TypeError('Graph type not supported. Use either nx.Graph or nx.Digraph')
+
+
+def _get_delta_Q(X, a):
+    '''
+    Calculate the delta modularity
+    .. math::
+        \deltaQ = s^T \cdot \^{B_{g}} \cdot s
+    .. math:: \deltaQ = s^T \cdot \^{B_{g}} \cdot s
+
+    Parameters
+    ----------
+    X : np.matrix
+        B_hat_g
+    a : np.matrix
+        s, which is the membership vector
+
+    Returns
+    -------
+    float
+        The corresponding :math:`\deltaQ`
+    '''
+
+    delta_Q = (a.T.dot(X)).dot(a)
+    return delta_Q[0,0]
+
+
+def get_mod_matrix(network, comm_nodes=None, B=None):
+    '''
+    This function computes the modularity matrix
+    for a specific group in the network.
+    (a.k.a., generalized modularity matrix)
+
+    Specifically,
+    .. math::
+        B^g_{i,j} = B_ij - \delta_{ij} \sum_(k \in g) B_ik
+        m = \abs[\Big]{E}
+        B_ij = A_ij - \dfrac{k_i k_j}{2m}
+        OR...
+        B_ij = \(A_ij - \frac{k_i^{in} k_j^{out}}{m}
+
+    When `comm_nodes` is None or all nodes in `network`, this reduces to :math:`B`
+
+    Parameters
+    ----------
+    network : nx.Graph or nx.DiGraph
+        The network of interest
+    comm_nodes : iterable (list, np.array, or tuple)
+        List of nodes that defines a community
+    B : np.matrix
+        Modularity matrix of `network`
+
+    Returns
+    -------
+    np.matrix
+        The modularity of `comm_nodes` within `network`
+    '''
+
+    if comm_nodes is None:
+        comm_nodes = list(network)
+        return get_base_modularity_matrix(network)
+
+    if B is None:
+        B = get_base_modularity_matrix(network)
+
+    # subset of mod matrix in g
+    indices = [list(network).index(u) for u in comm_nodes]
+    B_g = B[indices, :][:, indices]
+    #print 'Type of `B_g`:', type(B_g)
+
+    # B^g_(i,j) = B_ij - δ_ij * ∑_(k∈g) B_ik
+    # i, j ∈ g
+    B_hat_g = np.zeros((len(comm_nodes), len(comm_nodes)), dtype=float)
+
+    # ∑_(k∈g) B_ik
+    B_g_rowsum = np.asarray(B_g.sum(axis=1))[:, 0]
+    if type(network) == nx.Graph:
+        B_g_colsum = np.copy(B_g_rowsum)
+    elif type(network) == nx.DiGraph:
+        B_g_colsum = np.asarray(B_g.sum(axis=0))[0, :]
+
+    for i in range(B_hat_g.shape[0]):
+        for j in range(B_hat_g.shape[0]):
+            if i == j:
+                B_hat_g[i,j] = B_g[i,j] - 0.5 * (B_g_rowsum[i] + B_g_colsum[i])
+            else:
+                B_hat_g[i,j] = B_g[i,j]
+
+    if type(network) == nx.DiGraph:
+        B_hat_g = B_hat_g + B_hat_g.T
+
+    return sparse.csc_matrix(B_hat_g)
+
+
+def largest_eig(A):
+    '''
+        A wrapper over `scipy.linalg.eig` to produce
+        largest eigval and eigvector for A when A.shape is small
+    '''
+    vals, vectors = eig(A.todense())
+    real_indices = [idx for idx, val in enumerate(vals) if not bool(val.imag)]
+    vals = [vals[i].real for i in range(len(real_indices))]
+    vectors = [vectors[i] for i in range(len(real_indices))]
+    max_idx = np.argsort(vals)[-1]
+    return np.asarray([vals[max_idx]]), np.asarray([vectors[max_idx]]).T