promptbind.utils package

Submodules

promptbind.utils.fabind_inference_dataset module

class promptbind.utils.fabind_inference_dataset.InferenceDataset(index_csv, pdb_file_dir, preprocess_dir)

Bases: Dataset

get(idx)

Gets the data object at index idx.

len()

Returns the number of data objects stored in the dataset.

promptbind.utils.feature_utils module

promptbind.utils.feature_utils.Seed_everything(seed=42)
promptbind.utils.feature_utils.binarize(x)
promptbind.utils.feature_utils.extract_torchdrug_feature_from_mol(mol, has_LAS_mask=False)
promptbind.utils.feature_utils.extract_torchdrug_feature_from_mol_E3Bind(mol, has_LAS_mask=False)
promptbind.utils.feature_utils.generate_and_write_sdf_from_smiles_using_rdkit_E3Bind(smiles, rdkitMolFile=None, shift_dis=30, fast_generation=False)
promptbind.utils.feature_utils.generate_conformation(mol)
promptbind.utils.feature_utils.generate_rdkit_conformation_v2(smiles, n_repeat=50)
promptbind.utils.feature_utils.generate_sdf_from_smiles_using_rdkit(smiles, rdkitMolFile, shift_dis=30, fast_generation=False)
promptbind.utils.feature_utils.generate_sdf_from_smiles_using_rdkit_E3Bind(smiles, shift_dis=30, fast_generation=False)
promptbind.utils.feature_utils.get_LAS_distance_constraint_mask(mol)
promptbind.utils.feature_utils.get_canonical_smiles(smiles)
promptbind.utils.feature_utils.get_clean_res_list(res_list, verbose=False, ensure_ca_exist=False, bfactor_cutoff=None)
promptbind.utils.feature_utils.get_compound_pair_dis_distribution(coords, LAS_distance_constraint_mask=None)
promptbind.utils.feature_utils.get_protein_feature(res_list)
promptbind.utils.feature_utils.get_res_unique_id(residue)
promptbind.utils.feature_utils.n_hops_adj(adj, n_hops)
promptbind.utils.feature_utils.read_mol(sdf_fileName, mol2_fileName, verbose=False)
promptbind.utils.feature_utils.remove_hetero_and_extract_ligand(res_list, verbose=False, ensure_ca_exist=False, bfactor_cutoff=None)
promptbind.utils.feature_utils.save_cleaned_protein(c, proteinFile)
promptbind.utils.feature_utils.select_chain_within_cutoff_to_ligand_v2(x)
promptbind.utils.feature_utils.split_protein_and_ligand(c, pdb, ligand_seq_id, proteinFile, ligandFile)
promptbind.utils.feature_utils.write_renumbered_sdf(toFile, sdf_fileName, mol2_fileName)
promptbind.utils.feature_utils.write_with_new_coords(mol, new_coords, toFile)

promptbind.utils.generation_utils module

promptbind.utils.generation_utils.binarize(x)
promptbind.utils.generation_utils.compute_RMSD(a, b)
promptbind.utils.generation_utils.distance_loss_function(epoch, y_pred, x, protein_nodes_xyz, compound_pair_dis_constraint, LAS_distance_constraint_mask=None, mode=0)
promptbind.utils.generation_utils.distance_optimize_compound_coords(coords, y_pred, protein_nodes_xyz, compound_pair_dis_constraint, total_epoch=5000, loss_function=<function distance_loss_function>, LAS_distance_constraint_mask=None, mode=0, show_progress=False)
promptbind.utils.generation_utils.get_LAS_distance_constraint_mask(mol)
promptbind.utils.generation_utils.get_info_pred_distance(coords, y_pred, protein_nodes_xyz, compound_pair_dis_constraint, n_repeat=1, LAS_distance_constraint_mask=None, mode=0, show_progress=False)
promptbind.utils.generation_utils.n_hops_adj(adj, n_hops)
promptbind.utils.generation_utils.read_mol(sdf_fileName, mol2_fileName, verbose=False)
promptbind.utils.generation_utils.write_with_new_coords(mol, new_coords, toFile)

promptbind.utils.inference_mol_utils module

promptbind.utils.inference_mol_utils.binarize(x)
promptbind.utils.inference_mol_utils.extract_torchdrug_feature_from_mol(mol, has_LAS_mask=False)
promptbind.utils.inference_mol_utils.generate_conformation(mol)
promptbind.utils.inference_mol_utils.get_LAS_distance_constraint_mask(mol)
promptbind.utils.inference_mol_utils.get_compound_pair_dis_distribution(coords, LAS_distance_constraint_mask=None)
promptbind.utils.inference_mol_utils.n_hops_adj(adj, n_hops)
promptbind.utils.inference_mol_utils.read_mol_and_renumber(sdf_fileName, mol2_fileName, verbose=False)
promptbind.utils.inference_mol_utils.read_smiles(smile)
promptbind.utils.inference_mol_utils.write_mol(reference_mol, coords, output_file)

promptbind.utils.inference_pdb_utils module

promptbind.utils.inference_pdb_utils.extract_esm_feature(protein)
promptbind.utils.inference_pdb_utils.extract_protein_structure(path)
promptbind.utils.inference_pdb_utils.get_clean_res_list(res_list, verbose=False, ensure_ca_exist=False, bfactor_cutoff=None)
promptbind.utils.inference_pdb_utils.get_protein_structure(res_list)

promptbind.utils.logging_utils module

class promptbind.utils.logging_utils.Logger(accelerator, log_path)

Bases: object

log_message(msg)
log_stats(stats, epoch, args, prefix='')

promptbind.utils.metrics module

promptbind.utils.metrics.affinity_metrics(affinity_pred, affinity)
promptbind.utils.metrics.cls_metric(y_pred, y, threshold=0.5)
promptbind.utils.metrics.compute_individual_metrics(pdb_list, inputFile_list, y_list)
promptbind.utils.metrics.myMetric(y_pred, y, threshold=0.5)
promptbind.utils.metrics.pocket_direct_metrics(pocket_coord_pred, pocket_coord)
promptbind.utils.metrics.pocket_metrics(pocket_coord_pred, pocket_coord)
promptbind.utils.metrics.print_metrics(metrics)

promptbind.utils.metrics_to_tsb module

promptbind.utils.metrics_to_tsb.metrics_runtime_no_prefix(metrics, writer, epoch)

promptbind.utils.post_optim_utils module

promptbind.utils.post_optim_utils.compute_RMSD(a, b)
promptbind.utils.post_optim_utils.post_optimize_compound_coords(reference_compound_coords, predict_compound_coords, lr=0.1, total_epoch=1000, LAS_edge_index=None, mode=0)
promptbind.utils.post_optim_utils.post_optimize_compound_coords_lbfgs(reference_compound_coords, predict_compound_coords, lr=0.01, total_iter=100, total_epoch=10, LAS_edge_index=None, mode=0)
promptbind.utils.post_optim_utils.post_optimize_loss_function(epoch, x, predict_compound_coords, compound_pair_dis_constraint, LAS_distance_constraint_mask=None, mode=0)
promptbind.utils.post_optim_utils.read_molecule(molecule_file, sanitize=False, calc_charges=False, remove_hs=False)

Load a molecule from a file of format .mol2 or .sdf or .pdbqt or .pdb. :param molecule_file: Path to file for storing a molecule, which can be of format .mol2 or .sdf

or .pdbqt or .pdb.

Parameters:
  • sanitize (bool) – Whether sanitization is performed in initializing RDKit molecule instances. See https://www.rdkit.org/docs/RDKit_Book.html for details of the sanitization. Default to False.

  • calc_charges (bool) – Whether to add Gasteiger charges via RDKit. Setting this to be True will enforce sanitize to be True. Default to False.

  • remove_hs (bool) – Whether to remove hydrogens via RDKit. Note that removing hydrogens can be quite slow for large molecules. Default to False.

  • use_conformation (bool) – Whether we need to extract molecular conformation from proteins and ligands. Default to True.

Returns:

  • mol (rdkit.Chem.rdchem.Mol) – RDKit molecule instance for the loaded molecule.

  • coordinates (np.ndarray of shape (N, 3) or None) – The 3D coordinates of atoms in the molecule. N for the number of atoms in the molecule. None will be returned if use_conformation is False or we failed to get conformation information.

promptbind.utils.post_optim_utils.write_mol(reference_file, coords, output_file)

promptbind.utils.utils module

promptbind.utils.utils.SetDihedral(conf, atom_idx, new_vale)
promptbind.utils.utils.compute_dis_between_two_vector(a, b)
promptbind.utils.utils.compute_dis_between_two_vector_tensor(a, b)
promptbind.utils.utils.construct_data_from_graph_gvp_mean(args, protein_node_xyz, protein_seq, coords, compound_node_features, input_atom_edge_list, input_atom_edge_attr_list, LAS_edge_index, rdkit_coords, compound_coords_init_mode='pocket_center_rdkit', includeDisMap=True, pdb_id=None, group='train', seed=42, data_path=None, contactCutoff=8.0, pocket_radius=20, interactionThresholdDistance=10, compoundMode=1, add_noise_to_com=None, use_whole_protein=False, use_compound_com_as_pocket=True, chosen_pocket_com=None, random_rotation=False, pocket_idx_no_noise=True, protein_esm2_feat=None)
promptbind.utils.utils.evaluate_mean_pocket_cls_coord_multi_task(accelerator, args, data_loader, model, com_coord_criterion, criterion, pocket_cls_criterion, pocket_coord_criterion, relative_k, device, pred_dis=False, info=None, saveFileName=None, use_y_mask=False, skip_y_metrics_evaluation=False, stage=1)
promptbind.utils.utils.evaluate_mean_pocket_cls_coord_pocket_pred(args, data_loader, model, com_coord_criterion, criterion, pocket_cls_criterion, pocket_coord_criterion, relative_k, device, pred_dis=False, info=None, saveFileName=None, use_y_mask=False, skip_y_metrics_evaluation=False, stage=1)
promptbind.utils.utils.get_keepNode(com, protein_node_xyz, n_node, pocket_radius, use_whole_protein, use_compound_com_as_pocket, add_noise_to_com, chosen_pocket_com)
promptbind.utils.utils.get_keepNode_tensor(protein_node_xyz, pocket_radius, add_noise_to_com, chosen_pocket_com)
promptbind.utils.utils.get_protein_edge_features_and_index(protein_edge_index, protein_edge_s, protein_edge_v, keepNode)
promptbind.utils.utils.get_torsions(m)
promptbind.utils.utils.gumbel_softmax_no_random(logits: Tensor, tau: float = 1, hard: bool = False, eps: float = 1e-10, dim: int = -1) Tensor
promptbind.utils.utils.post_optim_mol(args, accelerator, data, com_coord_pred, compound_batch, LAS_tmp, rigid=False)
promptbind.utils.utils.read_mol(sdf_fileName, mol2_fileName, verbose=False)
promptbind.utils.utils.read_pdbbind_data(fileName)
promptbind.utils.utils.uniform_random_rotation(x)

Apply a random rotation in 3D, with a distribution uniform over the sphere. :param x: vector or set of vectors with dimension (n, 3), where n is the

number of vectors

Returns:

Array of shape (n, 3) containing the randomly rotated vectors of x, about the mean coordinate of x.

Algorithm taken from “Fast Random Rotation Matrices” (James Avro, 1992): https://doi.org/10.1016/B978-0-08-050755-2.50034-8

Module contents