Skip to content

Commit 0203099

Browse files
authored
Merge pull request #270 from PaddlePaddle/liwb
update
2 parents e46732f + 82123b9 commit 0203099

27 files changed

+46
-48
lines changed

README.md

+19-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,25 @@
66
[DOC](https://pgl.readthedocs.io/en/latest/) | [Quick Start](https://pgl.readthedocs.io/en/latest/quick_start/instruction.html) | [中文](./README.zh.md)
77

88
## Breaking News !!
9-
PGL v2.1 20210202
9+
🔥 🔥 🔥 **OGB-LSC KDD CUP 2021 winners announced!!** (2021.06.17)
10+
11+
12+
Super excited to announce our PGL team won <font color=Red>**TWO FIRST**</font> place and <font color=Red>**ONE SECOND**</font> place in a total of three track in OGB-LSC KDD CUP 2021.
13+
Leaderboards can be found [here](https://ogb.stanford.edu/kddcup2021/results/).
14+
15+
- **First place in MAG240M-LSC track**: Code and Technical Report can be found [here](./examples/kddcup2021/MAG240M/r_unimp).
16+
17+
- **First place in WikiKG90M-LSC track**: Code and Technical Report can be found [here](./examples/kddcup2021/WikiKG90M).
18+
19+
- **Second place in PCQM4M-LSC track**: Code and Technical Report can be found [here](./examples/kddcup2021/PCQM4M).
20+
21+
**Two amazing paper using PGL are accepted:** (2021.06.17)
22+
23+
- Masked Label Prediction: Unified Message Passing Model for Semi-Supervised Classification, to appear in **IJCAI2021**.
24+
- HGAMN: Heterogeneous Graph Attention Matching Network for Multilingual POI Retrieval at Baidu Maps, to appear in **KDD2021**.
25+
26+
27+
PGL v2.1 2021.02.02
1028

1129
- We are now support dygraph version of PaddlePaddle 2.0, and release PGL v2.1.
1230
- You can find the stable staic version of PGL in the branch "static_stable"

README.zh.md

+16
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,22 @@
77

88
## 最新消息
99

10+
&#x1F525; &#x1F525; &#x1F525; **OGB-LSC KDD CUP 2021 冠军宣布!!** (2021.06.17)
11+
12+
非常兴奋地宣布,我们PGL团队在 OGB-LSC KDD CUP 2021 的三个赛道中获得了<font color=Red>**两个冠军**</font>和<font color=Red>**一个亚军**</font>。官方排行榜可以参考[这里](https://ogb.stanford.edu/kddcup2021/results/).
13+
14+
15+
- **MAG240M-LSC 赛道第一名**: 代码和技术报告可以参考[这里](./examples/kddcup2021/MAG240M/r_unimp).
16+
17+
- **WikiKG90M-LSC 赛道第一名**: 代码和技术报告可以参考[这里](./examples/kddcup2021/WikiKG90M).
18+
19+
- **PCQM4M-LSC 赛道第二名**: 代码和技术报告可以参考[这里](./examples/kddcup2021/PCQM4M).
20+
21+
**两篇使用PGL的论文被接收!!** (2021.06.17)
22+
23+
- Masked Label Prediction: Unified Message Passing Model for Semi-Supervised Classification, to appear in **IJCAI2021**.
24+
- HGAMN: Heterogeneous Graph Attention Matching Network for Multilingual POI Retrieval at Baidu Maps, to appear in **KDD2021**.
25+
1026
PGL v2.1 2020.02.02
1127

1228
- 更易用了!正式发布PGL 2.1版本,正式支持PaddlePaddle 2.0

examples/kddcup2021/PCQM4M/ogbg_lsc/README.md renamed to examples/kddcup2021/PCQM4M/README.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,17 @@ pgl>=2.1.4
1313
```
1414

1515
## Data preparation
16-
Under the root directory, please run following command to downlaod the pretrain and pcqm4m datasets.
16+
Under the root directory, please run following command to downlaod the original pcqm4m dataset,
17+
DFT results for auxiliary tasks, and cross-validation split indexes.
18+
1719
```
1820
mkdir dataset && cd dataset
1921
wget http://ogb-data.stanford.edu/data/lsc/pcqm4m_kddcup2021.zip
2022
unzip pcqm4m_kddcup2021.zip
2123
wget https://baidu-nlp.bj.bcebos.com/PaddleHelix/datasets/PCQM_pretrain/sdf.tar.gz
2224
mv sdf.tar.gz pcqm_pyscf_sdf.tar.gz
2325
tar -xzvf pcqm_pyscf_sdf.tar.gz
26+
wget https://baidu-nlp.bj.bcebos.com/PaddleHelix/datasets/PCQM_pretrain/cross_split.pkl
2427
cd ..
2528
```
2629

examples/kddcup2021/PCQM4M/ogbg_lsc/src/test.py renamed to examples/kddcup2021/PCQM4M/src/test.py

+7-46
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def evaluate(model, loader):
6060
return input_dict
6161

6262
@paddle.no_grad()
63-
def infer(config):
63+
def infer(config, output_path):
6464
model = getattr(M, config.model_type)(config)
6565

6666
log.info("infer model from %s" % config.infer_from)
@@ -70,74 +70,35 @@ def infer(config):
7070
ds = getattr(DS, config.dataset_type)(config)
7171

7272
split_idx = ds.get_idx_split()
73-
train_ds = DS.Subset(ds, split_idx['train'], mode='train')
74-
valid_ds = DS.Subset(ds, split_idx['valid'], mode='valid')
7573
test_ds = DS.Subset(ds, split_idx['test'], mode='test')
76-
77-
log.info("Train exapmles: %s" % len(train_ds))
78-
log.info("Valid exapmles: %s" % len(valid_ds))
7974
log.info("Test exapmles: %s" % len(test_ds))
8075

81-
train_loader = Dataloader(train_ds, batch_size=config.batch_size, shuffle=False,
82-
num_workers=config.num_workers, collate_fn=DS.CollateFn(config),
83-
drop_last=True)
84-
85-
valid_loader = Dataloader(valid_ds, batch_size=config.valid_batch_size, shuffle=False,
86-
num_workers=1, collate_fn=DS.CollateFn(config))
87-
8876
test_loader = Dataloader(test_ds, batch_size=config.valid_batch_size, shuffle=False,
8977
num_workers=1, collate_fn=DS.CollateFn(config))
9078

91-
try:
92-
task_name = config.infer_from.split("/")[-2]
93-
except:
94-
task_name = "ogb_kdd"
95-
log.info("task_name: %s" % task_name)
96-
9779
### automatic evaluator. takes dataset name as input
9880
evaluator = PCQM4MEvaluator()
9981

100-
# ---------------- valid ----------------------- #
101-
# log.info("validating ...")
102-
# pred_dict = evaluate(model, valid_loader)
103-
#
104-
# log.info("valid MAE: %s" % evaluator.eval(pred_dict)["mae"])
105-
# valid_output_path = os.path.join(config.output_dir, task_name)
106-
# make_dir(valid_output_path)
107-
# valid_output_file = os.path.join(valid_output_path, "valid_mae.txt")
108-
#
109-
# log.info("saving valid result to %s" % valid_output_file)
110-
# with open(valid_output_file, 'w') as f:
111-
# for y_pred, idx in zip(pred_dict['y_pred'], split_idx['valid']):
112-
# smiles, label = ds.raw_dataset[idx]
113-
# f.write("%s\t%s\t%s\n" % (y_pred, label, smiles))
114-
#
11582
# ---------------- test ----------------------- #
116-
11783
log.info("testing ...")
11884
pred_dict = evaluate(model, test_loader)
11985

120-
test_output_path = os.path.join(config.output_dir, task_name)
86+
test_output_path = os.path.join(config.output_dir, config.task_name)
12187
make_dir(test_output_path)
122-
test_output_file = os.path.join(test_output_path, "test_mae.txt")
88+
test_output_file = os.path.join(test_output_path, "test_pred.npz")
12389

12490
log.info("saving test result to %s" % test_output_file)
125-
with open(test_output_file, 'w') as f:
126-
for y_pred, idx in zip(pred_dict['y_pred'], split_idx['test']):
127-
smiles, label = ds.raw_dataset[idx]
128-
f.write("%s\t%s\n" % (y_pred, smiles))
129-
130-
log.info("saving submition format to %s" % test_output_path)
131-
evaluator.save_test_submission({'y_pred': pred_dict['y_pred']}, test_output_path)
91+
np.savez_compressed(test_output_file, pred_dict['y_pred'].astype(np.float32))
13292

13393
if __name__=="__main__":
13494
parser = argparse.ArgumentParser(description='gnn')
13595
parser.add_argument("--config", type=str, default="./config.yaml")
13696
parser.add_argument("--task_name", type=str, default="task_name")
13797
parser.add_argument("--mode", type=str, default="train")
138-
parser.add_argument("--log_id", type=str, default=None)
98+
parser.add_argument("--output_path", type=str, default="./")
13999
args = parser.parse_args()
140100

141101
config = prepare_config(args.config, isCreate=False, isSave=False)
142-
infer(config)
102+
make_dir(args.output_path)
103+
infer(config, args.output_path)
143104

0 commit comments

Comments
 (0)