Merge pull request #94 from kirayummy/master

kirayummy · web-flow · commit 299328a0a796 · 2020-06-24T09:45:18.000+08:00
deeper_gcn
diff --git a/examples/deeper_gcn/README.md b/examples/deeper_gcn/README.md
@@ -0,0 +1,33 @@
+# DeeperGCN: All You Need to Train Deeper GCNs
+
+see more information in https://arxiv.org/pdf/2006.07739.pdf
+
+
+### Datasets
+
+The datasets contain three citation networks: CORA, PUBMED, CITESEER. The details for these three datasets can be found in the [paper](https://arxiv.org/abs/1609.02907).
+
+### Dependencies
+
+- paddlepaddle>=1.6
+- pgl
+
+### Performance
+
+We train our models for 200 epochs and report the accuracy on the test dataset.
+
+| Dataset | Accuracy |
+| --- | --- |
+| Cora | ~77% | 
+
+### How to run
+
+For examples, use gpu to train gat on cora dataset.
+```
+python train.py --dataset cora --use_cuda
+```
+
+#### Hyperparameters
+
+- dataset: The citation dataset "cora", "citeseer", "pubmed".
+- use_cuda: Use gpu if assign use_cuda. 
diff --git a/examples/deeper_gcn/model.py b/examples/deeper_gcn/model.py
@@ -0,0 +1,89 @@
+import pgl
+import paddle.fluid as fluid
+
+def DeeperGCN(gw, feature, num_layers, 
+        hidden_size, num_tasks, name, dropout_prob):
+    """Implementation of DeeperGCN, see the paper
+    "DeeperGCN: All You Need to Train Deeper GCNs" in
+    https://arxiv.org/pdf/2006.07739.pdf
+
+    Args:
+        gw: Graph wrapper object
+
+        feature: A tensor with shape (num_nodes, feature_size)
+
+        num_layers: num of layers in DeeperGCN
+
+        hidden_size: hidden_size in DeeperGCN
+
+        num_tasks: final prediction
+        
+        name: deeper gcn layer names
+
+        dropout_prob: dropout prob in DeeperGCN
+
+    Return:
+        A tensor with shape (num_nodes, hidden_size)
+    """
+    
+    beta = "dynamic"
+    feature = fluid.layers.fc(feature,
+                     hidden_size,
+                     bias_attr=False,
+                     param_attr=fluid.ParamAttr(name=name + '_weight'))
+    
+    output = pgl.layers.gen_conv(gw, feature, name=name+"_gen_conv_0", beta=beta)
+
+    for layer in range(num_layers):
+        # LN/BN->ReLU->GraphConv->Res
+        old_output = output
+        # 1. Layer Norm
+        output = fluid.layers.layer_norm(
+            output,
+            begin_norm_axis=1,
+            param_attr=fluid.ParamAttr(
+                name="norm_scale_%s_%d" % (name, layer),
+                initializer=fluid.initializer.Constant(1.0)),
+            bias_attr=fluid.ParamAttr(
+                name="norm_bias_%s_%d" % (name, layer),
+                initializer=fluid.initializer.Constant(0.0)))
+
+        # 2. ReLU
+        output = fluid.layers.relu(output)
+
+        #3. dropout
+        output = fluid.layers.dropout(output, 
+                dropout_prob=dropout_prob,
+                dropout_implementation="upscale_in_train")
+
+        #4 gen_conv
+        output = pgl.layers.gen_conv(gw, output, 
+                name=name+"_gen_conv_%d"%layer, beta=beta)
+        
+        #5 res
+        output = output + old_output
+
+    # final layer: LN + relu + droput
+    output = fluid.layers.layer_norm(
+        output,
+        begin_norm_axis=1,
+        param_attr=fluid.ParamAttr(
+            name="norm_scale_%s_%d" % (name, num_layers),
+            initializer=fluid.initializer.Constant(1.0)),
+        bias_attr=fluid.ParamAttr(
+            name="norm_bias_%s_%d" % (name, num_layers),
+            initializer=fluid.initializer.Constant(0.0)))
+    output = fluid.layers.relu(output)
+    output = fluid.layers.dropout(output, 
+            dropout_prob=dropout_prob,
+            dropout_implementation="upscale_in_train")
+    
+    # final prediction
+    output = fluid.layers.fc(output,
+                     num_tasks,
+                     bias_attr=False,
+                     param_attr=fluid.ParamAttr(name=name + '_final_weight'))
+
+    return output
+
+
diff --git a/examples/deeper_gcn/train.py b/examples/deeper_gcn/train.py
@@ -0,0 +1,155 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#-*- coding: utf-8 -*-
+import pgl
+from pgl import data_loader
+from pgl.utils.logger import log
+import paddle.fluid as fluid
+import numpy as np
+import time
+import argparse
+from pgl.utils.log_writer import LogWriter # vdl
+from model import DeeperGCN
+
+def load(name):
+    if name == 'cora':
+        dataset = data_loader.CoraDataset()
+    elif name == "pubmed":
+        dataset = data_loader.CitationDataset("pubmed", symmetry_edges=False)
+    elif name == "citeseer":
+        dataset = data_loader.CitationDataset("citeseer", symmetry_edges=False)
+    else:
+        raise ValueError(name + " dataset doesn't exists")
+    return dataset
+
+
+def main(args):
+    # vdl
+    writer = LogWriter("checkpoints/train_history")
+
+    dataset = load(args.dataset)
+    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
+    train_program = fluid.Program()
+    startup_program = fluid.Program()
+    test_program = fluid.Program()
+    hidden_size = 64
+    num_layers = 7
+
+    with fluid.program_guard(train_program, startup_program):
+        gw = pgl.graph_wrapper.GraphWrapper(
+            name="graph",
+            node_feat=dataset.graph.node_feat_info())
+        
+        output = DeeperGCN(gw, 
+                gw.node_feat["words"],
+                num_layers,
+                hidden_size,
+                dataset.num_classes,
+                "deepercnn",
+                0.1)
+
+        node_index = fluid.layers.data(
+            "node_index",
+            shape=[None, 1],
+            dtype="int64",
+            append_batch_size=False)
+        node_label = fluid.layers.data(
+            "node_label",
+            shape=[None, 1],
+            dtype="int64",
+            append_batch_size=False)
+
+        pred = fluid.layers.gather(output, node_index)
+        loss, pred = fluid.layers.softmax_with_cross_entropy(
+            logits=pred, label=node_label, return_softmax=True)
+        acc = fluid.layers.accuracy(input=pred, label=node_label, k=1)
+        loss = fluid.layers.mean(loss)
+
+    test_program = train_program.clone(for_test=True)
+    with fluid.program_guard(train_program, startup_program):
+        adam = fluid.optimizer.Adam(
+            regularization=fluid.regularizer.L2DecayRegularizer(
+                regularization_coeff=0.0005),
+            learning_rate=0.005)
+        adam.minimize(loss)
+
+    exe = fluid.Executor(place)
+    exe.run(startup_program)
+
+    feed_dict = gw.to_feed(dataset.graph)
+
+    train_index = dataset.train_index
+    train_label = np.expand_dims(dataset.y[train_index], -1)
+    train_index = np.expand_dims(train_index, -1)
+
+    val_index = dataset.val_index
+    val_label = np.expand_dims(dataset.y[val_index], -1)
+    val_index = np.expand_dims(val_index, -1)
+
+    test_index = dataset.test_index
+    test_label = np.expand_dims(dataset.y[test_index], -1)
+    test_index = np.expand_dims(test_index, -1)
+    
+    # get beta param
+    beta_param_list = []
+    for param in fluid.io.get_program_parameter(train_program):
+        if param.name.endswith("_beta"):
+            beta_param_list.append(param)
+
+    dur = []
+    for epoch in range(200):
+        if epoch >= 3:
+            t0 = time.time()
+        feed_dict["node_index"] = np.array(train_index, dtype="int64")
+        feed_dict["node_label"] = np.array(train_label, dtype="int64")
+        train_loss, train_acc = exe.run(train_program,
+                                        feed=feed_dict,
+                                        fetch_list=[loss, acc],
+                                        return_numpy=True)
+        for param in beta_param_list:
+            beta = np.array(fluid.global_scope().find_var(param.name).get_tensor())
+            writer.add_scalar("beta/"+param.name, beta, epoch)
+
+        if epoch >= 3:
+            time_per_epoch = 1.0 * (time.time() - t0)
+            dur.append(time_per_epoch)
+
+        feed_dict["node_index"] = np.array(val_index, dtype="int64")
+        feed_dict["node_label"] = np.array(val_label, dtype="int64")
+        val_loss, val_acc = exe.run(test_program,
+                                    feed=feed_dict,
+                                    fetch_list=[loss, acc],
+                                    return_numpy=True)
+
+        log.info("Epoch %d " % epoch + "(%.5lf sec) " % np.mean(dur) +
+                 "Train Loss: %f " % train_loss + "Train Acc: %f " % train_acc
+                 + "Val Loss: %f " % val_loss + "Val Acc: %f " % val_acc)
+
+    feed_dict["node_index"] = np.array(test_index, dtype="int64")
+    feed_dict["node_label"] = np.array(test_label, dtype="int64")
+    test_loss, test_acc = exe.run(test_program,
+                                  feed=feed_dict,
+                                  fetch_list=[loss, acc],
+                                  return_numpy=True)
+    log.info("Accuracy: %f" % test_acc)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='DeeperGCN')
+    parser.add_argument(
+        "--dataset", type=str, default="cora", help="dataset (cora, pubmed)")
+    parser.add_argument("--use_cuda", action='store_true', help="use_cuda")
+    args = parser.parse_args()
+    log.info(args)
+    main(args)
diff --git a/pgl/__init__.py b/pgl/__init__.py
@@ -21,3 +21,4 @@
 from pgl import heter_graph
 from pgl import heter_graph_wrapper
 from pgl import contrib
+from pgl import message_passing
diff --git a/pgl/layers/conv.py b/pgl/layers/conv.py
@@ -15,10 +15,10 @@
 graph neural networks.
 """
 import paddle.fluid as fluid
-from pgl import graph_wrapper
 from pgl.utils import paddle_helper
+from pgl import message_passing
 
-__all__ = ['gcn', 'gat', 'gin', 'gaan']
+__all__ = ['gcn', 'gat', 'gin', 'gaan', 'gen_conv']
 
 
 def gcn(gw, feature, hidden_size, activation, name, norm=None):
@@ -352,3 +352,55 @@ def recv_func(message):
     output = fluid.layers.dropout(output, dropout_prob=0.1)
 
     return output
+
+
+def gen_conv(gw,
+        feature,
+        name,
+        beta=None):
+    """Implementation of GENeralized Graph Convolution (GENConv), see the paper
+    "DeeperGCN: All You Need to Train Deeper GCNs" in
+    https://arxiv.org/pdf/2006.07739.pdf
+
+    Args:
+        gw: Graph wrapper object (:code:`StaticGraphWrapper` or :code:`GraphWrapper`)
+
+        feature: A tensor with shape (num_nodes, feature_size).
+
+        beta: [0, +infinity] or "dynamic" or None
+
+        name: deeper gcn layer names.
+
+    Return:
+        A tensor with shape (num_nodes, feature_size)
+    """
+   
+    if beta == "dynamic":
+        beta = fluid.layers.create_parameter(
+                shape=[1],
+                dtype='float32',
+                default_initializer=
+                    fluid.initializer.ConstantInitializer(value=1.0),
+                name=name + '_beta')
+    
+    # message passing
+    msg = gw.send(message_passing.copy_send, nfeat_list=[("h", feature)])
+    output = gw.recv(msg, message_passing.softmax_agg(beta))
+    
+    # msg norm
+    output = message_passing.msg_norm(feature, output, name)
+    output = feature + output
+    
+    output = fluid.layers.fc(output,
+                     feature.shape[-1],
+                     bias_attr=False,
+                     act="relu",
+                     param_attr=fluid.ParamAttr(name=name + '_weight1'))
+    
+    output = fluid.layers.fc(output,
+                     feature.shape[-1],
+                     bias_attr=False,
+                     param_attr=fluid.ParamAttr(name=name + '_weight2'))
+
+    return output
+
diff --git a/pgl/message_passing.py b/pgl/message_passing.py
diff --git a/pgl/utils/paddle_helper.py b/pgl/utils/paddle_helper.py