@@ -39,7 +39,7 @@ def read_csv(
3939 df: a `pandas.DataFrame`
4040 """
4141
42- df = pd .read_csv (urlpath , ** kwargs )
42+ df : pd . DataFrame = pd .read_csv (urlpath , ** kwargs ) # type: ignore
4343
4444 if smiles_column is not None :
4545 PandasTools .AddMoleculeColumnToFrame (df , smiles_column , mol_column )
@@ -78,48 +78,60 @@ def read_excel(
7878
7979def read_sdf (
8080 urlpath : Union [str , os .PathLike , TextIO ],
81+ sanitize : bool = True ,
8182 as_df : bool = False ,
8283 smiles_column : Optional [str ] = "smiles" ,
8384 mol_column : str = None ,
8485 include_private : bool = False ,
8586 include_computed : bool = False ,
86- sanitize : bool = True ,
8787 strict_parsing : bool = True ,
8888) -> Union [List [Chem .rdchem .Mol ], pd .DataFrame ]:
8989 """Read an SDF file.
9090
91+ Note: This function is meant to be used with dataset that fit _in-memory_.
92+ For a more advanced usage we suggest you to use directly `Chem.ForwardSDMolSupplier`.
93+
9194 Args:
9295 urlpath: Path to a file or a file-like object. Path can be remote or local.
96+ sanitize: Whether to sanitize the molecules.
9397 as_df: Whether to return a list mol or a pandas DataFrame.
9498 smiles_column: Name of the SMILES column. Only relevant if `as_df` is True.
9599 mol_column: Name of the mol column. Only relevant if `as_df` is True.
96100 include_private: Include private properties in the columns. Only relevant if
97101 `as_df` is True.
98102 include_computed: Include computed properties in the columns. Only relevant if
99103 `as_df` is True.
100- sanitize: Whether to sanitize the molecules
101104 strict_parsing: If set to false, the parser is more lax about correctness of the contents.
102105 """
103106
104107 # File-like object
105108 if isinstance (urlpath , io .IOBase ):
106- supplier = Chem .ForwardSDMolSupplier (urlpath , sanitize = False , strictParsing = strict_parsing )
107- mols = [mol for mol in supplier if mol is not None ]
109+ supplier = Chem .ForwardSDMolSupplier (
110+ urlpath ,
111+ sanitize = sanitize ,
112+ strictParsing = strict_parsing ,
113+ )
114+ mols = list (supplier )
108115
109116 # Regular local or remote paths
110117 else :
111118 with fsspec .open (urlpath ) as f :
119+
120+ # Handle gzip file if needed
112121 if str (urlpath ).endswith (".gz" ) or str (urlpath ).endswith (".gzip" ):
113122 f = gzip .open (f )
114- supplier = Chem .ForwardSDMolSupplier (f , sanitize = False , strictParsing = strict_parsing )
115- mols = [mol for mol in supplier if mol is not None ]
116123
117- if sanitize == True :
118- mols_props = [
119- (dm .sanitize_mol (mol ), mol .GetPropsAsDict ()) for mol in mols if mol is not None
120- ]
121- mols = [dm .set_mol_props (mol , props ) for mol , props in mols_props ]
124+ supplier = Chem .ForwardSDMolSupplier (
125+ f ,
126+ sanitize = sanitize ,
127+ strictParsing = strict_parsing ,
128+ )
129+ mols = list (supplier )
122130
131+ # Discard None values
132+ mols = [mol for mol in mols if mol is not None ]
133+
134+ # Convert to dataframe
123135 if as_df :
124136 return dm .to_df (
125137 mols ,
@@ -133,15 +145,15 @@ def read_sdf(
133145
134146
135147def to_sdf (
136- mols : Union [Sequence [Chem .rdchem .Mol ], pd .DataFrame ],
148+ mols : Union [Chem . rdchem . Mol , Sequence [Chem .rdchem .Mol ], pd .DataFrame ],
137149 urlpath : Union [str , os .PathLike , TextIO ],
138150 smiles_column : Optional [str ] = "smiles" ,
139151 mol_column : str = None ,
140152):
141153 """Write molecules to a file.
142154
143155 Args:
144- mols: a dataframe or a list of molecule.
156+ mols: a dataframe, a molecule or a list of molecule.
145157 urlpath: Path to a file or a file-like object. Path can be remote or local.
146158 smiles_column: Column name to extract the molecule.
147159 mol_column: Column name to extract the molecule. It takes
@@ -151,6 +163,9 @@ def to_sdf(
151163 if isinstance (mols , pd .DataFrame ):
152164 mols = dm .from_df (mols , smiles_column = smiles_column , mol_column = mol_column )
153165
166+ elif isinstance (mols , Chem .rdchem .Mol ):
167+ mols = [mols ]
168+
154169 # Filter out None values
155170 mols = [mol for mol in mols if mol is not None ]
156171
0 commit comments