Source code for deepfold.data.monomer

# Copyright 2024 DeepFold Team


import dataclasses
import functools
import os
import re
from typing import Dict, Set

import networkx as nx
import requests


[docs] @functools.lru_cache(maxsize=24) def fetch_ligand(id: str) -> str: ligand_id = id.upper() r = requests.get(f"https://files.wwpdb.org/pub/pdb/data/monomers/{ligand_id}") if r.status_code == 200: text = r.text return text RuntimeError(f"Cannot fetch '{ligand_id}'")
[docs] @functools.lru_cache(maxsize=24) def read_ligand( ligand_id: str, monomer_path: os.PathLike | None = None, ) -> str: ligand_id = ligand_id.upper() monomer_path = "." if monomer_path is None else monomer_path ligand_path = os.path.join(monomer_path, ligand_id) if monomer_path is not None and os.path.exists(ligand_path): with open(ligand_path, "r") as fp: return fp.read() else: return fetch_ligand(ligand_id)
[docs] @dataclasses.dataclass class Ligand: id: str = "" name: str = "" graph: nx.Graph = dataclasses.field(default=nx.Graph())
[docs] @functools.lru_cache(maxsize=24) def get_ligand( ligand_id: str, noh: bool = False, monomer_path: os.PathLike | None = None, ) -> Ligand: text = read_ligand(ligand_id, monomer_path=monomer_path) name = "" graph = nx.Graph() for s in text.split("\n"): entry = s.split() if len(entry) == 0: continue header = entry[0] entry = entry[1:] if header == "HET": ligand_id = entry[0] elif header == "HETNAM": name = entry[1] elif header == "CONECT": e1 = entry[0] if noh and e1.startswith("H"): continue end = int(entry[1]) + 2 for e2 in entry[2:end]: if noh and e2.startswith("H"): continue graph.add_edge(e1, e2) else: pass return Ligand(id=ligand_id, name=name, graph=graph)
[docs] @dataclasses.dataclass(frozen=True) class AtomMap: mapping: Dict[str, str] removed: Set[str]
[docs] @functools.lru_cache(maxsize=128) def build_atom_map(can: str, mod: str) -> AtomMap: can_lig = get_ligand(can, noh=True) # Canonical mod_lig = get_ligand(mod, noh=True) # Modified cutoff = { "ALA": 2, "ARG": 6, "ASN": 3, "ASP": 3, "CYS": 2, "GLN": 4, "GLU": 4, "GLY": 2, "HIS": 4, "ILE": 3, "LEU": 3, "LYS": 5, "MET": 4, "PHE": 5, "PRO": 2, "SER": 2, "THR": 2, "TRP": 6, "TYR": 6, "VAL": 2, } k = cutoff[can] # How far from CA? shortest_paths = nx.single_source_shortest_path_length(mod_lig.graph, "CA") nodes_to_remove = [node for node, dist in shortest_paths.items() if dist > k] mod_lig.graph.remove_nodes_from(nodes_to_remove) scores = [] # Heuristic ismags = nx.isomorphism.ISMAGS(can_lig.graph, mod_lig.graph) largest_common_subgraph = list(ismags.largest_common_subgraph()) for i, sub in enumerate(largest_common_subgraph): if "CA" not in sub: continue score = 0 for k, v in sub.items(): if k == v: score += 10 elif k[0] != v[0]: score -= 100 else: pass scores.append((i, score)) scores.sort(key=lambda x: x[1], reverse=True) e = largest_common_subgraph[scores[0][0]] mapping = {v: k for k, v in e.items() if k[0] == v[0]} removed = set(mod_lig.graph.nodes).difference(mapping.keys()) return AtomMap(mapping=mapping, removed=removed)