diffalign.utils package

Subpackages

Submodules

diffalign.utils.chem module

diffalign.utils.chem.bond_type_to_int(bond)[source]
diffalign.utils.chem.draw_mol_svg(mol, molSize=(450, 150), kekulize=False)[source]
diffalign.utils.chem.get_2D_mol(mol)[source]
diffalign.utils.chem.get_atom_symbol(atomic_number)[source]
diffalign.utils.chem.get_atoms_in_ring(mol)[source]
diffalign.utils.chem.get_best_rmsd(probe, ref)[source]
diffalign.utils.chem.mol_to_graph_data_obj(mol)[source]
diffalign.utils.chem.mol_to_smiles(mol: Mol) str[source]
diffalign.utils.chem.mol_to_smiles_without_Hs(mol: Mol) str[source]
diffalign.utils.chem.remove_duplicate_mols(molecules: List[Mol]) List[Mol][source]
diffalign.utils.chem.set_conformer_positions(conf, pos)[source]
diffalign.utils.chem.set_rdmol_positions(rdkit_mol, pos)[source]
Parameters:
  • rdkit_mol – An rdkit.Chem.rdchem.Mol object.

  • pos – (N_atoms, 3)

diffalign.utils.chem.set_rdmol_positions_(mol, pos)[source]
Parameters:
  • rdkit_mol – An rdkit.Chem.rdchem.Mol object.

  • pos – (N_atoms, 3)

diffalign.utils.chem.update_data_pos_from_rdmol(data)[source]
diffalign.utils.chem.update_data_rdmol_positions(data)[source]

diffalign.utils.common module

class diffalign.utils.common.ExponentialLR_with_minLr(optimizer, gamma, min_lr=0.0001, last_epoch=-1, verbose=False)[source]

Bases: ExponentialLR

get_lr()[source]
diffalign.utils.common.get_optimizer(cfg, model)[source]
diffalign.utils.common.get_scheduler(cfg, optimizer)[source]
diffalign.utils.common.repeat_batch(batch: Batch, num_repeat) Batch[source]
diffalign.utils.common.repeat_data(data: Data, num_repeat) Batch[source]

diffalign.utils.dataloader module

class diffalign.utils.dataloader.Collater(dataset: Dataset | Sequence[BaseData] | DatasetAdapter, follow_batch: List[str] | None = None, exclude_keys: List[str] | None = None)[source]

Bases: object

class diffalign.utils.dataloader.DataLoader(dataset: Dataset | Sequence[BaseData] | DatasetAdapter, batch_size: int = 1, shuffle: bool = False, follow_batch: List[str] | None = None, exclude_keys: List[str] | None = None, **kwargs)[source]

Bases: DataLoader

diffalign.utils.datasets module

class diffalign.utils.datasets.ConformationDataset(path, transform=None)[source]

Bases: Dataset

get(idx)[source]

Gets the data object at index idx.

len()[source]

Returns the number of data objects stored in the dataset.

class diffalign.utils.datasets.MolClusterData(x: Tensor | None = None, edge_index: Tensor | None = None, edge_attr: Tensor | None = None, y: Tensor | int | float | None = None, pos: Tensor | None = None, time: Tensor | None = None, **kwargs)[source]

Bases: Data

class diffalign.utils.datasets.PackedConformationDataset(path, transform=None)[source]

Bases: ConformationDataset

class diffalign.utils.datasets.SidechainConformationDataset(path, transform=None, cutoff=10.0, max_residue=5000, fix_subgraph=False)[source]

Bases: ConformationDataset

static collate_fn(data)[source]
diffalign.utils.datasets.accumulate_grad_from_subgraph(model, atom_type, pos, bond_index, bond_type, batch, atom2res, batch_size=8, device='cuda:0', is_sidechain=None, is_alpha=None, pos_gt=None, cutoff=10.0, max_residue=5000, transform=None)[source]
  1. decompose the protein to subgraphs

  2. evaluate subgraphs using trained models

  3. accumulate atom-wise grads

  4. return grads

diffalign.utils.datasets.get_test_set_with_large_num_conf(base_path, dataset_name, block, tot_mol_size=1000, seed=None, confmin=50, confmax=500)[source]

base_path: directory that contains GEOM dataset dataset_name: dataset name, should be in [qm9, drugs] conf_per_mol: keep mol that has at least conf_per_mol confs, and sampling the most probable conf_per_mol confs train_size ratio, val = test = (1-train_size) / 2 tot_mol_size: max num of mols. The total number of final confs should be tot_mol_size * conf_per_mol seed: rand seed for RNG

diffalign.utils.datasets.pdb_to_data(pdb_path, name)[source]
diffalign.utils.datasets.prepare_pdb2(scn_dir, data_path)[source]
diffalign.utils.datasets.prepare_pdb_valtest(scn_dir, data_path)[source]
diffalign.utils.datasets.prepare_pdblarge(scn_dir, data_path)[source]
diffalign.utils.datasets.preprocess_GEOM_dataset(base_path, dataset_name, max_conf=5, train_size=0.8, max_size=9999999999, seed=None)[source]
diffalign.utils.datasets.preprocess_GEOM_dataset_with_fixed_num_conf(base_path, dataset_name, conf_per_mol=5, train_size=0.8, tot_mol_size=50000, seed=None)[source]

base_path: directory that contains GEOM dataset dataset_name: dataset name, should be in [qm9, drugs] conf_per_mol: keep mol that has at least conf_per_mol confs, and sampling the most probable conf_per_mol confs train_size ratio, val = test = (1-train_size) / 2 tot_mol_size: max num of mols. The total number of final confs should be tot_mol_size * conf_per_mol seed: rand seed for RNG

diffalign.utils.datasets.preprocess_iso17_dataset(base_path)[source]
diffalign.utils.datasets.rdmol_cluster_to_data(mol: Mol, smiles=None)[source]
diffalign.utils.datasets.rdmol_to_data(mol: ~rdkit.Chem.rdchem.Mol, smiles=None, data_cls=<class 'torch_geometric.data.data.Data'>)[source]

diffalign.utils.misc module

class diffalign.utils.misc.BlackHole[source]

Bases: object

diffalign.utils.misc.get_checkpoint_path(folder, it=None)[source]
diffalign.utils.misc.get_logger(name, log_dir=None, log_fn='log.txt')[source]
diffalign.utils.misc.get_new_log_dir(root='./logs', prefix='', tag='')[source]
diffalign.utils.misc.inf_iterator(iterable)[source]
diffalign.utils.misc.int_tuple(argstr)[source]
diffalign.utils.misc.log_hyperparams(writer, args)[source]
diffalign.utils.misc.repeat_batch(batch, num_repeat)[source]
diffalign.utils.misc.repeat_data(data, num_repeat)[source]
diffalign.utils.misc.seed_all(seed)[source]
diffalign.utils.misc.str_tuple(argstr)[source]

diffalign.utils.transforms module

class diffalign.utils.transforms.AddAngleDihedral[source]

Bases: object

static iter_angle_triplet(bond_mat)[source]
static iter_dihedral_quartet(bond_mat)[source]
class diffalign.utils.transforms.AddEdgeLength[source]

Bases: object

class diffalign.utils.transforms.AddEdgeName(asymmetric=True)[source]

Bases: object

class diffalign.utils.transforms.AddHigherOrderEdges(order, num_types=22)[source]

Bases: object

binarize(x)[source]
get_higher_order_adj_matrix(adj, order)[source]
Parameters:
  • adj – (N, N)

  • type_mat – (N, N)

class diffalign.utils.transforms.AddPlaceHolder[source]

Bases: object

class diffalign.utils.transforms.CountNodesPerGraph[source]

Bases: object

diffalign.utils.visualize module

diffalign.utils.visualize.visualize_mol(mol, size=(300, 300), surface=False, opacity=0.5)[source]

Draw molecule in 3D

Args:

mol: rdMol, molecule to show size: tuple(int, int), canvas size style: str, type of drawing molecule

style can be ‘line’, ‘stick’, ‘sphere’, ‘carton’

surface, bool, display SAS opacity, float, opacity of surface, range 0.0-1.0

Return:

viewer: py3Dmol.view, a class for constructing embedded 3Dmol.js views in ipython notebooks.

Module contents