Source code for promptbind.data.inference_preprocess_mol_confs
importtorchimportargparseimportosfrompromptbind.utils.inference_mol_utilsimportread_smiles,extract_torchdrug_feature_from_mol,generate_conformationimportpandasaspdfrommultiprocessingimportPoolif__name__=="__main__":parser=argparse.ArgumentParser(description='Preprocess molecules.')parser.add_argument("--index_csv",type=str,default="../inference_examples/test.csv",help="Specify the index path for molecules.")parser.add_argument("--save_mols_dir",type=str,default="../inference_examples/mol",help="Specify where to save the processed pt.")parser.add_argument("--num_threads",type=int,default=10,help="Multiprocessing threads number")args=parser.parse_args()os.system(f'mkdir -p {args.save_mols_dir}')withopen(args.index_csv,'r')asf:content=f.readlines()info=[]forlineincontent[1:]:smiles,pdb=line.strip().split(',')info.append([smiles,pdb])info=pd.DataFrame(info,columns=['smiles','pdb'])idx=[iforiinrange(len(info))]withPool(processes=args.num_threads)asp:_=p.map(get_mol_info,idx)
[docs]defget_mol_info(idx):try:smiles=info.iloc[idx].smilesmol=read_smiles(smiles)mol=generate_conformation(mol)molecule_info=extract_torchdrug_feature_from_mol(mol,has_LAS_mask=True)torch.save([mol,molecule_info],os.path.join(args.save_mols_dir,f'mol_{idx}.pt'))exceptExceptionase:print('Failed to read molecule id ',idx,' We are skipping it. The reason is the exception: ',e)