PaddlePaddle
diff --git a/‎examples/citation_benchmark/README.md
Lines changed: 62 additions & 0 deletions b/‎examples/citation_benchmark/README.md
Lines changed: 62 additions & 0 deletions
diff --git a/‎examples/citation_benchmark/build_model.py
Lines changed: 43 additions & 0 deletions b/‎examples/citation_benchmark/build_model.py
Lines changed: 43 additions & 0 deletions
diff --git a/‎examples/citation_benchmark/config/appnp.yaml
Lines changed: 9 additions & 0 deletions b/‎examples/citation_benchmark/config/appnp.yaml
Lines changed: 9 additions & 0 deletions
diff --git a/‎examples/citation_benchmark/config/gat.yaml
Lines changed: 9 additions & 0 deletions b/‎examples/citation_benchmark/config/gat.yaml
Lines changed: 9 additions & 0 deletions
diff --git a/‎examples/citation_benchmark/config/gcn.yaml
Lines changed: 7 additions & 0 deletions b/‎examples/citation_benchmark/config/gcn.yaml
Lines changed: 7 additions & 0 deletions
diff --git a/‎examples/citation_benchmark/config/gcnii.yaml
Lines changed: 9 additions & 0 deletions b/‎examples/citation_benchmark/config/gcnii.yaml
Lines changed: 9 additions & 0 deletions
diff --git a/‎examples/citation_benchmark/config/sgc.yaml
Lines changed: 5 additions & 0 deletions b/‎examples/citation_benchmark/config/sgc.yaml
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/citation_benchmark/model.py
Lines changed: 195 additions & 0 deletions b/‎examples/citation_benchmark/model.py
Lines changed: 195 additions & 0 deletions
@@ -0,0 +1,62 @@
+# Easy Paper Reproduction for Citation Network (Cora/Pubmed/Citeseer)
+
+
+
+This page tries to reproduce all the **Graph Neural Network** paper for Citation Network (Cora/Pubmed/Citeseer), which is the **Hello world**  dataset (**small** and **fast**) for graph neural networks. But it's very hard to achieve very high performance.
+
+
+
+All datasets are runned with public split of  **semi-supervised** settings. And we report the averarge accuracy by running 10 times.
+
+
+
+# Experiment Results
+
+| Model                                                        | Cora         | Pubmed       | Citeseer     | Remarks                                                   |
+| ------------------------------------------------------------ | ------------ | ------------ | ------------ | --------------------------------------------------------- |
+| [Vanilla GCN (Kipf 2017)](https://openreview.net/pdf?id=SJU4ayYgl ) | 0.807(0.010) | 0.794(0.003) | 0.710(0.007) |                                                           |
+| [GAT (Veličković 2017)](https://arxiv.org/pdf/1710.10903.pdf) | 0.834(0.004) | 0.772(0.004) | 0.700(0.006) |                                                           |
+| [SGC(Wu 2019)](https://arxiv.org/pdf/1902.07153.pdf)         | 0.818(0.000) | 0.782(0.000) | 0.708(0.000) |                                                           |
+| [APPNP (Johannes 2018)](https://arxiv.org/abs/1810.05997)    | 0.846(0.003) | 0.803(0.002) | 0.719(0.003) | Almost the same with  the results reported in Appendix E. |
+| [GCNII (64 Layers, 1500 Epochs, Chen 2020)](https://arxiv.org/pdf/2007.02133.pdf) | 0.846(0.003) | 0.798(0.003) | 0.724(0.006) |                                                           |
+
+
+
+
+
+How to run the experiments?
+
+
+
+```shell
+# Device choose
+export CUDA_VISIBLE_DEVICES=0
+# GCN
+python train.py --conf config/gcn.yaml --use_cuda --dataset cora
+python train.py --conf config/gcn.yaml --use_cuda --dataset pubmed
+python train.py --conf config/gcn.yaml --use_cuda --dataset citeseer
+
+
+# GAT
+python train.py --conf config/gat.yaml --use_cuda --dataset cora
+python train.py --conf config/gat.yaml --use_cuda --dataset pubmed
+python train.py --conf config/gat.yaml --use_cuda --dataset citeseer
+
+
+# SGC (Slow version)
+python train.py --conf config/sgc.yaml --use_cuda --dataset cora
+python train.py --conf config/sgc.yaml --use_cuda --dataset pubmed
+python train.py --conf config/sgc.yaml --use_cuda --dataset citeseer
+
+# APPNP
+python train.py --conf config/appnp.yaml --use_cuda --dataset cora
+python train.py --conf config/appnp.yaml --use_cuda --dataset pubmed
+python train.py --conf config/appnp.yaml --use_cuda --dataset citeseer
+
+# GCNII (The original code use 1500 epochs.)
+python train.py --conf config/gcnii.yaml --use_cuda --dataset cora --epoch 1500
+python train.py --conf config/gcnii.yaml --use_cuda --dataset pubmed --epoch 1500
+python train.py --conf config/gcnii.yaml --use_cuda --dataset citeseer --epoch 1500
+```
+
+
@@ -0,0 +1,43 @@
+import pgl
+import model
+from pgl import data_loader
+import paddle.fluid as fluid
+import numpy as np
+import time
+
+def build_model(dataset, config, phase, main_prog):
+    gw = pgl.graph_wrapper.GraphWrapper(
+            name="graph",
+            node_feat=dataset.graph.node_feat_info())
+
+    GraphModel = getattr(model, config.model_name)
+    m = GraphModel(config=config, num_class=dataset.num_classes) 
+    logits = m.forward(gw, gw.node_feat["words"], phase)
+
+    # Take the last
+    node_index = fluid.layers.data(
+            "node_index",
+            shape=[None, 1],
+            dtype="int64",
+            append_batch_size=False)
+    node_label = fluid.layers.data(
+            "node_label",
+            shape=[None, 1],
+            dtype="int64",
+            append_batch_size=False)
+
+    pred = fluid.layers.gather(logits, node_index)
+    loss, pred = fluid.layers.softmax_with_cross_entropy(
+        logits=pred, label=node_label, return_softmax=True)
+    acc = fluid.layers.accuracy(input=pred, label=node_label, k=1)
+    loss = fluid.layers.mean(loss)
+
+    if phase == "train":
+        adam = fluid.optimizer.Adam(
+            learning_rate=config.learning_rate,
+            regularization=fluid.regularizer.L2DecayRegularizer(
+                regularization_coeff=config.weight_decay))
+        adam.minimize(loss)
+    return gw, loss, acc
+
+
@@ -0,0 +1,9 @@
+model_name: APPNP 
+k_hop: 10
+alpha: 0.1
+num_layer: 1
+learning_rate: 0.01
+dropout: 0.5
+hidden_size: 64
+weight_decay: 0.0005 
+edge_dropout: 0.0
@@ -0,0 +1,9 @@
+model_name: GAT 
+learning_rate: 0.005
+weight_decay: 0.0005 
+num_layers: 1
+feat_drop: 0.6
+attn_drop: 0.6
+num_heads: 8
+hidden_size: 8
+edge_dropout: 0.0
@@ -0,0 +1,7 @@
+model_name: GCN 
+num_layers: 1
+dropout: 0.5
+hidden_size: 16 
+learning_rate: 0.01
+weight_decay: 0.0005 
+edge_dropout: 0.0
@@ -0,0 +1,9 @@
+model_name: GCNII 
+k_hop: 64 
+alpha: 0.1
+num_layer: 1
+learning_rate: 0.01
+dropout: 0.6
+hidden_size: 64
+weight_decay: 0.0005 
+edge_dropout: 0.0
@@ -0,0 +1,5 @@
+model_name: SGC 
+num_layers: 2
+learning_rate: 0.2
+weight_decay: 0.000005 
+feature_pre_normalize: False
@@ -0,0 +1,195 @@
+import pgl
+import paddle.fluid.layers as L
+import pgl.layers.conv as conv
+
+def get_norm(indegree):
+    float_degree = L.cast(indegree, dtype="float32")
+    float_degree = L.clamp(float_degree, min=1.0)
+    norm = L.pow(float_degree, factor=-0.5) 
+    return norm
+    
+
+class GCN(object):
+    """Implement of GCN
+    """
+    def __init__(self, config, num_class):
+        self.num_class = num_class
+        self.num_layers = config.get("num_layers", 1)
+        self.hidden_size = config.get("hidden_size", 64)
+        self.dropout = config.get("dropout", 0.5)
+        self.edge_dropout = config.get("edge_dropout", 0.0)
+
+    def forward(self, graph_wrapper, feature, phase):
+        
+        for i in range(self.num_layers):
+
+            if phase == "train":
+                ngw = pgl.sample.edge_drop(graph_wrapper, self.edge_dropout) 
+                norm = get_norm(ngw.indegree())
+            else:
+                ngw = graph_wrapper
+                norm = graph_wrapper.node_feat["norm"]
+
+
+            feature = pgl.layers.gcn(ngw,
+                feature,
+                self.hidden_size,
+                activation="relu",
+                norm=norm,
+                name="layer_%s" % i)
+
+            feature = L.dropout(
+                    feature,
+                    self.dropout,
+                    dropout_implementation='upscale_in_train')
+
+        if phase == "train": 
+            ngw = pgl.sample.edge_drop(graph_wrapper, self.edge_dropout) 
+            norm = get_norm(ngw.indegree())
+        else:
+            ngw = graph_wrapper
+            norm = graph_wrapper.node_feat["norm"]
+
+        feature = conv.gcn(ngw,
+                     feature,
+                     self.num_class,
+                     activation=None,
+                     norm=norm,
+                     name="output")
+
+        return feature
+
+class GAT(object):
+    """Implement of GAT"""
+    def __init__(self, config, num_class):
+        self.num_class = num_class 
+        self.num_layers = config.get("num_layers", 1)
+        self.num_heads = config.get("num_heads", 8)
+        self.hidden_size = config.get("hidden_size", 8)
+        self.feat_dropout = config.get("feat_drop", 0.6)
+        self.attn_dropout = config.get("attn_drop", 0.6)
+        self.edge_dropout = config.get("edge_dropout", 0.0)
+
+    def forward(self, graph_wrapper, feature, phase):
+        if phase == "train": 
+            edge_dropout = 0
+        else:
+            edge_dropout = self.edge_dropout
+
+        for i in range(self.num_layers):
+            ngw = pgl.sample.edge_drop(graph_wrapper, edge_dropout) 
+                
+            feature = conv.gat(ngw,
+                                feature,
+                                self.hidden_size,
+                                activation="elu",
+                                name="gat_layer_%s" % i,
+                                num_heads=self.num_heads,
+                                feat_drop=self.feat_dropout,
+                                attn_drop=self.attn_dropout)
+
+        ngw = pgl.sample.edge_drop(graph_wrapper, edge_dropout) 
+        feature = conv.gat(ngw,
+                     feature,
+                     self.num_class,
+                     num_heads=1,
+                     activation=None,
+                     feat_drop=self.feat_dropout,
+                     attn_drop=self.attn_dropout,
+                     name="output")
+        return feature
+
+   
+class APPNP(object):
+    """Implement of APPNP"""
+    def __init__(self, config, num_class):
+        self.num_class = num_class
+        self.num_layers = config.get("num_layers", 1)
+        self.hidden_size = config.get("hidden_size", 64)
+        self.dropout = config.get("dropout", 0.5)
+        self.alpha = config.get("alpha", 0.1)
+        self.k_hop = config.get("k_hop", 10)
+        self.edge_dropout = config.get("edge_dropout", 0.0)
+
+    def forward(self, graph_wrapper, feature, phase):
+        if phase == "train": 
+            edge_dropout = 0
+        else:
+            edge_dropout = self.edge_dropout
+
+        for i in range(self.num_layers):
+            feature = L.dropout(
+                feature,
+                self.dropout,
+                dropout_implementation='upscale_in_train')
+            feature = L.fc(feature, self.hidden_size, act="relu", name="lin%s" % i)
+
+        feature = L.dropout(
+            feature,
+            self.dropout,
+            dropout_implementation='upscale_in_train')
+
+        feature = L.fc(feature, self.num_class, act=None, name="output")
+
+        feature = conv.appnp(graph_wrapper,
+            feature=feature,
+            edge_dropout=edge_dropout,
+            alpha=self.alpha,
+            k_hop=self.k_hop)
+        return feature
+
+class SGC(object):
+    """Implement of SGC"""
+    def __init__(self, config, num_class):
+        self.num_class = num_class
+        self.num_layers = config.get("num_layers", 1)
+
+    def forward(self, graph_wrapper, feature, phase):
+        feature = conv.appnp(graph_wrapper,
+            feature=feature,
+            edge_dropout=0,
+            alpha=0,
+            k_hop=self.num_layers)
+        feature.stop_gradient=True
+        feature = L.fc(feature, self.num_class, act=None, bias_attr=False, name="output")
+        return feature
+
+ 
+class GCNII(object):
+    """Implement of GCNII"""
+    def __init__(self, config, num_class):
+        self.num_class = num_class
+        self.num_layers = config.get("num_layers", 1)
+        self.hidden_size = config.get("hidden_size", 64)
+        self.dropout = config.get("dropout", 0.6)
+        self.alpha = config.get("alpha", 0.1)
+        self.lambda_l = config.get("lambda_l", 0.5)
+        self.k_hop = config.get("k_hop", 64)
+        self.edge_dropout = config.get("edge_dropout", 0.0)
+
+    def forward(self, graph_wrapper, feature, phase):
+        if phase == "train": 
+            edge_dropout = 0
+        else:
+            edge_dropout = self.edge_dropout
+
+        for i in range(self.num_layers):
+            feature = L.fc(feature, self.hidden_size, act="relu", name="lin%s" % i)
+            feature = L.dropout(
+                feature,
+                self.dropout,
+                dropout_implementation='upscale_in_train')
+
+        feature = conv.gcnii(graph_wrapper,
+            feature=feature,
+            name="gcnii",
+            activation="relu",
+            lambda_l=self.lambda_l,
+            alpha=self.alpha,
+            dropout=self.dropout,
+            k_hop=self.k_hop)
+
+        feature = L.fc(feature, self.num_class, act=None, name="output")
+        return feature
+
+