PaddlePaddle
diff --git a/‎README.md
+19-1 b/‎README.md
+19-1
diff --git a/‎README.zh.md
+16 b/‎README.zh.md
+16
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/README.md renamed to ‎examples/kddcup2021/PCQM4M/README.md
+4-1 b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/README.md renamed to ‎examples/kddcup2021/PCQM4M/README.md
+4-1
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/SuperHelix_PCQM4M.pdf renamed to ‎examples/kddcup2021/PCQM4M/SuperHelix_PCQM4M.pdf b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/SuperHelix_PCQM4M.pdf renamed to ‎examples/kddcup2021/PCQM4M/SuperHelix_PCQM4M.pdf
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/ensemble/ensemble.py renamed to ‎examples/kddcup2021/PCQM4M/ensemble/ensemble.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/ensemble/ensemble.py renamed to ‎examples/kddcup2021/PCQM4M/ensemble/ensemble.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/ensemble/y_pred_pcqm4m.npz renamed to ‎examples/kddcup2021/PCQM4M/ensemble/y_pred_pcqm4m.npz b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/ensemble/y_pred_pcqm4m.npz renamed to ‎examples/kddcup2021/PCQM4M/ensemble/y_pred_pcqm4m.npz
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/features/__init__.py renamed to ‎examples/kddcup2021/PCQM4M/features/__init__.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/features/__init__.py renamed to ‎examples/kddcup2021/PCQM4M/features/__init__.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/features/extended_feature.py renamed to ‎examples/kddcup2021/PCQM4M/features/extended_feature.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/features/extended_feature.py renamed to ‎examples/kddcup2021/PCQM4M/features/extended_feature.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/features/local_feature.py renamed to ‎examples/kddcup2021/PCQM4M/features/local_feature.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/features/local_feature.py renamed to ‎examples/kddcup2021/PCQM4M/features/local_feature.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/features/mol_tree.py renamed to ‎examples/kddcup2021/PCQM4M/features/mol_tree.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/features/mol_tree.py renamed to ‎examples/kddcup2021/PCQM4M/features/mol_tree.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/features/obabel_utils.py renamed to ‎examples/kddcup2021/PCQM4M/features/obabel_utils.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/features/obabel_utils.py renamed to ‎examples/kddcup2021/PCQM4M/features/obabel_utils.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/models/__init__.py renamed to ‎examples/kddcup2021/PCQM4M/models/__init__.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/models/__init__.py renamed to ‎examples/kddcup2021/PCQM4M/models/__init__.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/models/conv.py renamed to ‎examples/kddcup2021/PCQM4M/models/conv.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/models/conv.py renamed to ‎examples/kddcup2021/PCQM4M/models/conv.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/models/layers.py renamed to ‎examples/kddcup2021/PCQM4M/models/layers.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/models/layers.py renamed to ‎examples/kddcup2021/PCQM4M/models/layers.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/models/mol_encoder.py renamed to ‎examples/kddcup2021/PCQM4M/models/mol_encoder.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/models/mol_encoder.py renamed to ‎examples/kddcup2021/PCQM4M/models/mol_encoder.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/config.yaml renamed to ‎examples/kddcup2021/PCQM4M/src/config.yaml b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/config.yaml renamed to ‎examples/kddcup2021/PCQM4M/src/config.yaml
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/cross_run.sh renamed to ‎examples/kddcup2021/PCQM4M/src/cross_run.sh b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/cross_run.sh renamed to ‎examples/kddcup2021/PCQM4M/src/cross_run.sh
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/dataset.py renamed to ‎examples/kddcup2021/PCQM4M/src/dataset.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/dataset.py renamed to ‎examples/kddcup2021/PCQM4M/src/dataset.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/fleet_run.sh renamed to ‎examples/kddcup2021/PCQM4M/src/fleet_run.sh b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/fleet_run.sh renamed to ‎examples/kddcup2021/PCQM4M/src/fleet_run.sh
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/main.py renamed to ‎examples/kddcup2021/PCQM4M/src/main.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/main.py renamed to ‎examples/kddcup2021/PCQM4M/src/main.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/main.sh renamed to ‎examples/kddcup2021/PCQM4M/src/main.sh b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/main.sh renamed to ‎examples/kddcup2021/PCQM4M/src/main.sh
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/model.py renamed to ‎examples/kddcup2021/PCQM4M/src/model.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/model.py renamed to ‎examples/kddcup2021/PCQM4M/src/model.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/test.py renamed to ‎examples/kddcup2021/PCQM4M/src/test.py
+7-46 b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/src/test.py renamed to ‎examples/kddcup2021/PCQM4M/src/test.py
+7-46
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/utils/__init__.py renamed to ‎examples/kddcup2021/PCQM4M/utils/__init__.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/utils/__init__.py renamed to ‎examples/kddcup2021/PCQM4M/utils/__init__.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/utils/config.py renamed to ‎examples/kddcup2021/PCQM4M/utils/config.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/utils/config.py renamed to ‎examples/kddcup2021/PCQM4M/utils/config.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/utils/logger.py renamed to ‎examples/kddcup2021/PCQM4M/utils/logger.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/utils/logger.py renamed to ‎examples/kddcup2021/PCQM4M/utils/logger.py
diff --git a/‎examples/kddcup2021/PCQM4M/ogbg_lsc/utils/util.py renamed to ‎examples/kddcup2021/PCQM4M/utils/util.py b/‎examples/kddcup2021/PCQM4M/ogbg_lsc/utils/util.py renamed to ‎examples/kddcup2021/PCQM4M/utils/util.py
@@ -6,7 +6,25 @@
 [DOC](https://pgl.readthedocs.io/en/latest/) | [Quick Start](https://pgl.readthedocs.io/en/latest/quick_start/instruction.html) | [中文](./README.zh.md)
 
 ## Breaking News !!
-PGL v2.1 20210202
+&#x1F525; &#x1F525; &#x1F525; **OGB-LSC KDD CUP 2021 winners announced!!**  (2021.06.17)
+
+
+Super excited to announce our PGL team won <font color=Red>**TWO FIRST**</font> place and <font color=Red>**ONE SECOND**</font> place in a total of three track in OGB-LSC KDD CUP 2021.
+Leaderboards can be found [here](https://ogb.stanford.edu/kddcup2021/results/).
+
+- **First place in MAG240M-LSC track**: Code and Technical Report can be found [here](./examples/kddcup2021/MAG240M/r_unimp).
+
+- **First place in WikiKG90M-LSC track**: Code and Technical Report can be found [here](./examples/kddcup2021/WikiKG90M).
+
+- **Second place in PCQM4M-LSC track**: Code and Technical Report can be found [here](./examples/kddcup2021/PCQM4M).
+
+**Two amazing paper using PGL are accepted:** (2021.06.17)
+
+- Masked Label Prediction: Unified Message Passing Model for Semi-Supervised Classification, to appear in **IJCAI2021**.
+- HGAMN: Heterogeneous Graph Attention Matching Network for Multilingual POI Retrieval at Baidu Maps, to appear in **KDD2021**.
+
+
+PGL v2.1 2021.02.02
 
 - We are now support dygraph version of PaddlePaddle 2.0, and release PGL v2.1.
 - You can find the stable staic version of PGL in the branch "static_stable"
 
@@ -7,6 +7,22 @@
 
 ## 最新消息
 
+&#x1F525; &#x1F525; &#x1F525; **OGB-LSC KDD CUP 2021 冠军宣布!!**  (2021.06.17)
+
+非常兴奋地宣布，我们PGL团队在 OGB-LSC KDD CUP 2021 的三个赛道中获得了<font color=Red>**两个冠军**</font>和<font color=Red>**一个亚军**</font>。官方排行榜可以参考[这里](https://ogb.stanford.edu/kddcup2021/results/).
+
+
+- **MAG240M-LSC 赛道第一名**: 代码和技术报告可以参考[这里](./examples/kddcup2021/MAG240M/r_unimp).
+
+- **WikiKG90M-LSC 赛道第一名**: 代码和技术报告可以参考[这里](./examples/kddcup2021/WikiKG90M).
+
+- **PCQM4M-LSC 赛道第二名**: 代码和技术报告可以参考[这里](./examples/kddcup2021/PCQM4M).
+
+**两篇使用PGL的论文被接收！！** (2021.06.17)
+
+- Masked Label Prediction: Unified Message Passing Model for Semi-Supervised Classification, to appear in **IJCAI2021**.
+- HGAMN: Heterogeneous Graph Attention Matching Network for Multilingual POI Retrieval at Baidu Maps, to appear in **KDD2021**.
+
 PGL v2.1 2020.02.02
 
 - 更易用了！正式发布PGL 2.1版本，正式支持PaddlePaddle 2.0
 
@@ -13,14 +13,17 @@ pgl>=2.1.4
 ```
 
 ## Data preparation
-Under the root directory, please run following command to downlaod the pretrain and pcqm4m datasets.
+Under the root directory, please run following command to downlaod the original pcqm4m dataset,
+DFT results for auxiliary tasks, and cross-validation split indexes.
+
 ```
 mkdir dataset && cd dataset
 wget http://ogb-data.stanford.edu/data/lsc/pcqm4m_kddcup2021.zip
 unzip pcqm4m_kddcup2021.zip
 wget https://baidu-nlp.bj.bcebos.com/PaddleHelix/datasets/PCQM_pretrain/sdf.tar.gz
 mv sdf.tar.gz pcqm_pyscf_sdf.tar.gz
 tar -xzvf pcqm_pyscf_sdf.tar.gz
+wget https://baidu-nlp.bj.bcebos.com/PaddleHelix/datasets/PCQM_pretrain/cross_split.pkl
 cd ..
 ```
 
 
@@ -60,7 +60,7 @@ def evaluate(model, loader):
     return input_dict
 
 @paddle.no_grad()
-def infer(config):
+def infer(config, output_path):
     model = getattr(M, config.model_type)(config)
 
     log.info("infer model from %s" % config.infer_from)
@@ -70,74 +70,35 @@ def infer(config):
     ds = getattr(DS, config.dataset_type)(config)
 
     split_idx = ds.get_idx_split()
-    train_ds = DS.Subset(ds, split_idx['train'], mode='train')
-    valid_ds = DS.Subset(ds, split_idx['valid'], mode='valid')
     test_ds = DS.Subset(ds, split_idx['test'], mode='test')
-
-    log.info("Train exapmles: %s" % len(train_ds))
-    log.info("Valid exapmles: %s" % len(valid_ds))
     log.info("Test exapmles: %s" % len(test_ds))
 
-    train_loader = Dataloader(train_ds, batch_size=config.batch_size, shuffle=False,
-            num_workers=config.num_workers, collate_fn=DS.CollateFn(config),
-            drop_last=True)
-
-    valid_loader = Dataloader(valid_ds, batch_size=config.valid_batch_size, shuffle=False,
-            num_workers=1, collate_fn=DS.CollateFn(config))
-
     test_loader = Dataloader(test_ds, batch_size=config.valid_batch_size, shuffle=False,
             num_workers=1, collate_fn=DS.CollateFn(config))
 
-    try:
-        task_name = config.infer_from.split("/")[-2]
-    except:
-        task_name = "ogb_kdd"
-    log.info("task_name: %s" % task_name)
-
     ### automatic evaluator. takes dataset name as input
     evaluator = PCQM4MEvaluator()
 
-    # ---------------- valid ----------------------- #
-    #  log.info("validating ...")
-    #  pred_dict = evaluate(model, valid_loader)
-    #
-    #  log.info("valid MAE: %s" % evaluator.eval(pred_dict)["mae"])
-    #  valid_output_path = os.path.join(config.output_dir, task_name)
-    #  make_dir(valid_output_path)
-    #  valid_output_file = os.path.join(valid_output_path, "valid_mae.txt")
-    #
-    #  log.info("saving valid result to %s" % valid_output_file)
-    #  with open(valid_output_file, 'w') as f:
-    #      for y_pred, idx in zip(pred_dict['y_pred'], split_idx['valid']):
-    #          smiles, label = ds.raw_dataset[idx]
-    #          f.write("%s\t%s\t%s\n" % (y_pred, label, smiles))
-    #
     # ---------------- test ----------------------- #
-
     log.info("testing ...")
     pred_dict = evaluate(model, test_loader)
 
-    test_output_path = os.path.join(config.output_dir, task_name)
+    test_output_path = os.path.join(config.output_dir, config.task_name)
     make_dir(test_output_path)
-    test_output_file = os.path.join(test_output_path, "test_mae.txt")
+    test_output_file = os.path.join(test_output_path, "test_pred.npz")
 
     log.info("saving test result to %s" % test_output_file)
-    with open(test_output_file, 'w') as f:
-        for y_pred, idx in zip(pred_dict['y_pred'], split_idx['test']):
-            smiles, label = ds.raw_dataset[idx]
-            f.write("%s\t%s\n" % (y_pred, smiles))
-
-    log.info("saving submition format to %s" % test_output_path)
-    evaluator.save_test_submission({'y_pred': pred_dict['y_pred']}, test_output_path)
+    np.savez_compressed(test_output_file, pred_dict['y_pred'].astype(np.float32))
 
 if __name__=="__main__":
     parser = argparse.ArgumentParser(description='gnn')
     parser.add_argument("--config", type=str, default="./config.yaml")
     parser.add_argument("--task_name", type=str, default="task_name")
     parser.add_argument("--mode", type=str, default="train")
-    parser.add_argument("--log_id", type=str, default=None)
+    parser.add_argument("--output_path", type=str, default="./")
     args = parser.parse_args()
 
     config = prepare_config(args.config, isCreate=False, isSave=False)
-    infer(config)
+    make_dir(args.output_path)
+    infer(config, args.output_path)