Created
March 22, 2021 12:48
-
-
Save PatWalters/ca41289a6990ebf7af1e5c44e188fccd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import dask.dataframe as dd | |
import pandas as pd | |
from rdkit import Chem | |
from rdkit.Chem.Descriptors import MolWt | |
from rdkit.Chem.rdMolDescriptors import BCUT2D | |
import time | |
# -- molecular weight functions | |
def calc_mw(smi): | |
mol = Chem.MolFromSmiles(smi) | |
return MolWt(mol) | |
def mw_df(df): | |
return df.SMILES.apply(calc_mw) | |
# -- bcut functions | |
def bcut_df(df): | |
return df.SMILES.apply(calc_bcut) | |
def calc_bcut(smi): | |
mol = Chem.MolFromSmiles(smi) | |
return BCUT2D(mol) | |
def main(): | |
start = time.time() | |
df = pd.read_csv(sys.argv[1],sep=" ",names=["SMILES","Name"]) | |
ddf = dd.from_pandas(df,npartitions=16) | |
ddf['MW'] = ddf.map_partitions(mw_df,meta='float').compute(scheduler='processes') | |
# this works | |
# ddf['BCUT'] = ddf.map_partitions(bcut_df,meta='float').compute() | |
# this does not | |
ddf['BCUT'] = ddf.map_partitions(bcut_df,meta='float').compute(scheduler='processes') | |
print(time.time()-start) | |
print(ddf.head()) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment