Merge pull request #29 from PaddlePaddle/pretrain_gnn

LihangLiu · web-flow · commit c9538c0b29a8 · 2020-12-17T13:12:04.000+08:00
add unittest
diff --git a/apps/pretrained_compound/pretrain_gnns/README.md b/apps/pretrained_compound/pretrain_gnns/README.md
diff --git a/apps/pretrained_compound/pretrain_gnns/README_cn.md b/apps/pretrained_compound/pretrain_gnns/README_cn.md
diff --git a/pahelix/tests/import_test.py b/pahelix/tests/import_test.py
@@ -0,0 +1,24 @@
+#!/usr/bin/python
+#-*-coding:utf-8-*-
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+
+class ImportTest(unittest.TestCase):
+    def test_import_pahelix_alone(self):
+        import pahelix
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/pahelix/utils/tests/compound_tools_test.py b/pahelix/utils/tests/compound_tools_test.py
@@ -0,0 +1,55 @@
+#!/usr/bin/python
+#-*-coding:utf-8-*-
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import unittest
+from rdkit import Chem
+from rdkit.Chem import AllChem
+
+from pahelix.utils.compound_tools import smiles_to_graph_data
+from pahelix.utils.compound_tools import mol_to_graph_data
+from pahelix.utils.compound_tools import get_gasteiger_partial_charges
+from pahelix.utils.compound_tools import create_standardized_mol_id
+from pahelix.utils.compound_tools import split_rdkit_mol_obj
+from pahelix.utils.compound_tools import CompoundConstants
+
+
+class CompoundToolsTest(unittest.TestCase):
+    def test_mol_to_graph_data(self, add_self_loop=True):
+        smiles ='CCOc1ccc2nc(S(N)(=O)=O)sc2c1'
+        mol = AllChem.MolFromSmiles(smiles)
+        data = mol_to_graph_data(mol)
+        self.assertTrue(data)
+  
+    def test_smiles_to_graph_data(self, add_self_loop=True):
+        smiles ='CCOc1ccc2nc(S(N)(=O)=O)sc2c1'
+        data = smiles_to_graph_data(smiles)
+        self.assertTrue(data)
+       
+    def test_get_gasteiger_partial_charges(self, n_iter=12):
+        smiles ='CCOc1ccc2nc(S(N)(=O)=O)sc2c1'
+        mol = AllChem.MolFromSmiles(smiles)
+        charges = get_gasteiger_partial_charges(mol)
+        self.assertEqual(len(charges), 16)
+
+    def test_create_standardized_mol_id(self):
+        smiles ='CCOc1ccc2nc(S(N)(=O)=O)sc2c1'
+        id1 = create_standardized_mol_id(smiles)
+        id2 = create_standardized_mol_id(smiles)
+        self.assertEqual(id1, id2)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/pahelix/utils/tests/data_utils_test.py b/pahelix/utils/tests/data_utils_test.py
@@ -0,0 +1,41 @@
+#!/usr/bin/python
+#-*-coding:utf-8-*-
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import numpy as np
+import unittest
+
+from pahelix.utils.data_utils import load_npz_to_data_list
+from pahelix.utils.data_utils import save_data_list_to_npz
+
+
+class DataUtilsTest(unittest.TestCase):  
+    def test_data_list_to_npz(self):
+        data_list = [
+            {"a": np.array([1,23,4])}, 
+            {"a": np.array([2,34,5])}
+        ]
+        npz_file = 'tmp.npz'
+        save_data_list_to_npz(data_list, npz_file)
+        reload_data_list = load_npz_to_data_list(npz_file)
+        self.assertEqual(len(data_list), len(reload_data_list))
+        for d1, d2 in zip(data_list, reload_data_list):
+            self.assertEqual(len(d1), len(d2))
+            for key in d1:
+                self.assertTrue((d1[key] == d2[key]).all())
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/pahelix/utils/tests/splitters_test.py b/pahelix/utils/tests/splitters_test.py
@@ -0,0 +1,127 @@
+#!/usr/bin/python
+#-*-coding:utf-8-*-
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import numpy as np
+import unittest
+
+from pahelix.utils.splitters import \
+        RandomSplitter, IndexSplitter, ScaffoldSplitter, RandomScaffoldSplitter
+from pahelix.datasets.inmemory_dataset import InMemoryDataset
+from pahelix.featurizers.featurizer import Featurizer
+
+
+class RandomSplitterTest(unittest.TestCase):  
+    def test_split(self):
+        raw_data_list = [
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+        ]
+        dataset = InMemoryDataset(raw_data_list)
+        splitter = RandomSplitter()
+        train_dataset, valid_dataset, test_dataset = splitter.split(
+                dataset, frac_train=0.34, frac_valid=0.33, frac_test=0.33)
+        n = len(train_dataset) + len(valid_dataset) + len(test_dataset)
+        self.assertEqual(n, len(dataset))
+
+
+class IndexSplitterTest(unittest.TestCase):  
+    def test_split(self):
+        raw_data_list = [
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+        ]
+        dataset = InMemoryDataset(raw_data_list)
+        splitter = IndexSplitter()
+        train_dataset, valid_dataset, test_dataset = splitter.split(
+                dataset, frac_train=0.34, frac_valid=0.33, frac_test=0.33)
+        n = len(train_dataset) + len(valid_dataset) + len(test_dataset)
+        self.assertEqual(n, len(dataset))
+
+
+class ScaffoldSplitterTest(unittest.TestCase):  
+    def test_split(self):
+        raw_data_list = [
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+        ]
+        dataset = InMemoryDataset(raw_data_list)
+        splitter = ScaffoldSplitter()
+        train_dataset, valid_dataset, test_dataset = splitter.split(
+                dataset, frac_train=0.34, frac_valid=0.33, frac_test=0.33)
+        n = len(train_dataset) + len(valid_dataset) + len(test_dataset)
+        self.assertEqual(n, len(dataset))
+
+
+class RandomScaffoldSplitterTest(unittest.TestCase):  
+    def test_split(self):
+        raw_data_list = [
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CCOc1ccc2nc(S(N)(=O)=O)sc2c1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CC(C)CCCCCCCOP(OCCCCCCCC(C)C)Oc1ccccc1'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+            {'smiles': 'CCCCCCCCCCOCC(O)CN'},
+        ]
+        dataset = InMemoryDataset(raw_data_list)
+        splitter = RandomScaffoldSplitter()
+        train_dataset, valid_dataset, test_dataset = splitter.split(
+                dataset, frac_train=0.34, frac_valid=0.33, frac_test=0.33)
+        n = len(train_dataset) + len(valid_dataset) + len(test_dataset)
+        self.assertEqual(n, len(dataset))
+
+
+if __name__ == '__main__':
+    unittest.main()