diffalign.utils package

Subpackages

Submodules

diffalign.utils.chem module

diffalign.utils.chem.bond_type_to_int(bond)
diffalign.utils.chem.draw_mol_svg(mol, molSize=(450, 150), kekulize=False)
diffalign.utils.chem.get_2D_mol(mol)
diffalign.utils.chem.get_atom_symbol(atomic_number)
diffalign.utils.chem.get_atoms_in_ring(mol)
diffalign.utils.chem.get_best_rmsd(probe, ref)
diffalign.utils.chem.mol_to_graph_data_obj(mol)
diffalign.utils.chem.mol_to_smiles(mol: Mol) str
diffalign.utils.chem.mol_to_smiles_without_Hs(mol: Mol) str
diffalign.utils.chem.remove_duplicate_mols(molecules: List[Mol]) List[Mol]
diffalign.utils.chem.set_conformer_positions(conf, pos)
diffalign.utils.chem.set_rdmol_positions(rdkit_mol, pos)
Parameters:
  • rdkit_mol – An rdkit.Chem.rdchem.Mol object.

  • pos – (N_atoms, 3)

diffalign.utils.chem.set_rdmol_positions_(mol, pos)
Parameters:
  • rdkit_mol – An rdkit.Chem.rdchem.Mol object.

  • pos – (N_atoms, 3)

diffalign.utils.chem.update_data_pos_from_rdmol(data)
diffalign.utils.chem.update_data_rdmol_positions(data)

diffalign.utils.common module

class diffalign.utils.common.ExponentialLR_with_minLr(optimizer, gamma, min_lr=0.0001, last_epoch=-1, verbose=False)

Bases: ExponentialLR

get_lr()
diffalign.utils.common.get_optimizer(cfg, model)
diffalign.utils.common.get_scheduler(cfg, optimizer)
diffalign.utils.common.repeat_batch(batch: Batch, num_repeat) Batch
diffalign.utils.common.repeat_data(data: Data, num_repeat) Batch

diffalign.utils.dataloader module

class diffalign.utils.dataloader.Collater(dataset: Dataset | Sequence[BaseData] | DatasetAdapter, follow_batch: List[str] | None = None, exclude_keys: List[str] | None = None)

Bases: object

class diffalign.utils.dataloader.DataLoader(dataset: Dataset | Sequence[BaseData] | DatasetAdapter, batch_size: int = 1, shuffle: bool = False, follow_batch: List[str] | None = None, exclude_keys: List[str] | None = None, **kwargs)

Bases: DataLoader

diffalign.utils.datasets module

class diffalign.utils.datasets.ConformationDataset(path, transform=None)

Bases: Dataset

get(idx)

Gets the data object at index idx.

len()

Returns the number of data objects stored in the dataset.

class diffalign.utils.datasets.MolClusterData(x: Tensor | None = None, edge_index: Tensor | None = None, edge_attr: Tensor | None = None, y: Tensor | int | float | None = None, pos: Tensor | None = None, time: Tensor | None = None, **kwargs)

Bases: Data

class diffalign.utils.datasets.PackedConformationDataset(path, transform=None)

Bases: ConformationDataset

class diffalign.utils.datasets.SidechainConformationDataset(path, transform=None, cutoff=10.0, max_residue=5000, fix_subgraph=False)

Bases: ConformationDataset

static collate_fn(data)
diffalign.utils.datasets.accumulate_grad_from_subgraph(model, atom_type, pos, bond_index, bond_type, batch, atom2res, batch_size=8, device='cuda:0', is_sidechain=None, is_alpha=None, pos_gt=None, cutoff=10.0, max_residue=5000, transform=None)
  1. decompose the protein to subgraphs

  2. evaluate subgraphs using trained models

  3. accumulate atom-wise grads

  4. return grads

diffalign.utils.datasets.get_test_set_with_large_num_conf(base_path, dataset_name, block, tot_mol_size=1000, seed=None, confmin=50, confmax=500)

base_path: directory that contains GEOM dataset dataset_name: dataset name, should be in [qm9, drugs] conf_per_mol: keep mol that has at least conf_per_mol confs, and sampling the most probable conf_per_mol confs train_size ratio, val = test = (1-train_size) / 2 tot_mol_size: max num of mols. The total number of final confs should be tot_mol_size * conf_per_mol seed: rand seed for RNG

diffalign.utils.datasets.pdb_to_data(pdb_path, name)
diffalign.utils.datasets.prepare_pdb2(scn_dir, data_path)
diffalign.utils.datasets.prepare_pdb_valtest(scn_dir, data_path)
diffalign.utils.datasets.prepare_pdblarge(scn_dir, data_path)
diffalign.utils.datasets.preprocess_GEOM_dataset(base_path, dataset_name, max_conf=5, train_size=0.8, max_size=9999999999, seed=None)
diffalign.utils.datasets.preprocess_GEOM_dataset_with_fixed_num_conf(base_path, dataset_name, conf_per_mol=5, train_size=0.8, tot_mol_size=50000, seed=None)

base_path: directory that contains GEOM dataset dataset_name: dataset name, should be in [qm9, drugs] conf_per_mol: keep mol that has at least conf_per_mol confs, and sampling the most probable conf_per_mol confs train_size ratio, val = test = (1-train_size) / 2 tot_mol_size: max num of mols. The total number of final confs should be tot_mol_size * conf_per_mol seed: rand seed for RNG

diffalign.utils.datasets.preprocess_iso17_dataset(base_path)
diffalign.utils.datasets.rdmol_cluster_to_data(mol: Mol, smiles=None)
diffalign.utils.datasets.rdmol_to_data(mol: ~rdkit.Chem.rdchem.Mol, smiles=None, data_cls=<class 'torch_geometric.data.data.Data'>)

diffalign.utils.misc module

class diffalign.utils.misc.BlackHole

Bases: object

diffalign.utils.misc.get_checkpoint_path(folder, it=None)
diffalign.utils.misc.get_logger(name, log_dir=None, log_fn='log.txt')
diffalign.utils.misc.get_new_log_dir(root='./logs', prefix='', tag='')
diffalign.utils.misc.inf_iterator(iterable)
diffalign.utils.misc.int_tuple(argstr)
diffalign.utils.misc.log_hyperparams(writer, args)
diffalign.utils.misc.repeat_batch(batch, num_repeat)
diffalign.utils.misc.repeat_data(data, num_repeat)
diffalign.utils.misc.seed_all(seed)
diffalign.utils.misc.str_tuple(argstr)

diffalign.utils.transforms module

class diffalign.utils.transforms.AddAngleDihedral

Bases: object

static iter_angle_triplet(bond_mat)
static iter_dihedral_quartet(bond_mat)
class diffalign.utils.transforms.AddEdgeLength

Bases: object

class diffalign.utils.transforms.AddEdgeName(asymmetric=True)

Bases: object

class diffalign.utils.transforms.AddHigherOrderEdges(order, num_types=22)

Bases: object

binarize(x)
get_higher_order_adj_matrix(adj, order)
Parameters:
  • adj – (N, N)

  • type_mat – (N, N)

class diffalign.utils.transforms.AddPlaceHolder

Bases: object

class diffalign.utils.transforms.CountNodesPerGraph

Bases: object

diffalign.utils.visualize module

diffalign.utils.visualize.visualize_mol(mol, size=(300, 300), surface=False, opacity=0.5)

Draw molecule in 3D

Args:

mol: rdMol, molecule to show size: tuple(int, int), canvas size style: str, type of drawing molecule

style can be ‘line’, ‘stick’, ‘sphere’, ‘carton’

surface, bool, display SAS opacity, float, opacity of surface, range 0.0-1.0

Return:

viewer: py3Dmol.view, a class for constructing embedded 3Dmol.js views in ipython notebooks.

Module contents