#!/usr/bin/env python
import numpy as np
from pathlib import Path
PDBfmt = '%-6s%5d %-4s %3s %1s%5s %8.3f%8.3f%8.3f\n'
[docs]
class PDB:
def __init__(self, pdb_fn, read=True, read_het=True):
self.pdb_fn = pdb_fn
self.model_s = []
self.use_model = False
if read: self.read(read_het=read_het)
def __repr__(self):
return self.pdb_fn
def __len__(self):
return len(self.model_s)
def __getitem__(self, i):
return self.model_s[i]
[docs]
def read(self, read_het=True):
read_model = True
self.model_s = []
i_model = 0
resNo_prev = None
chain_prev = None
model = Model()
self.model_s.append(model)
#
fp = open("%s"%self.pdb_fn)
lines = fp.readlines()
fp.close()
#
for line in lines:
if line.startswith("MODEL"):
self.use_model = True
model_no = int(line.strip().split()[1])
if len(model) != 0 and model.use:
i_model += 1
if len(model) != 0 and model.use:
model = Model(model_no=model_no)
self.model_s.append(model)
else:
model.model_no = model_no
model.append(PDBline(line))
elif line.startswith("ATOM") or line.startswith("HETATM"):
if line[16] not in ['A', ' ']:
continue
if line.startswith("HETATM") and (not read_het):
continue
#
model.use = True
#
resNo = line[22:27]
chain_id = line[21]
#
if resNo != resNo_prev or chain_id != chain_prev:
resNo_prev = resNo
chain_prev = chain_id
#
residue = Residue(line)
model.append(residue)
residue.append(line)
elif read_model:
model.append(PDBline(line))
[docs]
def write(self, exclude_remark=True, exclude_symm=False, exclude_missing_bb=True, model_index=[],\
remark_s=[]):
model_index = range(len(self))
#
wrt = []
wrt.extend(remark_s)
#
model_no = 0
for j in model_index:
if self.use_model:
model_no += 1
wrt.append("MODEL %4d\n"%(model_no))
wrt.extend(self.model_s[j].write(exclude_remark=exclude_remark, exclude_symm=exclude_symm,\
exclude_missing_bb=exclude_missing_bb))
if self.use_model:
wrt.append("ENDMDL\n")
wrt.append("END\n")
return wrt
[docs]
class Model:
def __init__(self, model_no=0):
self.use = False
self.model_no = model_no
self.lines = []
self.res_index = {}
[docs]
def append(self, X):
self.lines.append(X)
if X.isResidue():
self.res_index[(X.chainID(), X.resNo_char())] = len(self.lines)-1
[docs]
def index(self, key):
return self.res_index[key]
def __getitem__(self, i):
return self.lines[i]
def __len__(self):
return len(self.get_residues())
[docs]
def write(self, exclude_remark=False, exclude_symm=False, exclude_missing_bb=False,\
exclude_nucl=False, exclude_SSbond=False, remark_s=[], chain_id=None):
wrt = []
wrt.extend(remark_s)
for line in self.lines:
if line.isResidue():
if chain_id != None and chain_id != line.chainID():
continue
if exclude_nucl and line.isAtom() and \
(line.resName().strip() in ['DA','DC','DG','DT','DU','A','C','G','T','U']):
continue
if exclude_missing_bb and (not line.check_bb()):
continue
wrt.append('%s'%line)
else:
if line.startswith("MODEL"):
continue
elif line.startswith("END"):
continue
elif line.startswith("TER"):
if len(wrt) != 0 and (not wrt[-1].startswith("TER")):
wrt.append("TER\n")
continue
if line.startswith('REMARK 350') and (not exclude_symm):
wrt.append('%s'%line)
continue
elif line.startswith('SSBOND') and (not exclude_SSbond):
wrt.append('%s'%line)
continue
elif exclude_remark:
continue
wrt.append('%s'%line)
return wrt
def __repr__(self):
return ''.join(self.write())
[docs]
def get_residues(self, res_range=[]):
lines = []
for line in self.lines:
if not line.isResidue():
continue
if len(res_range) != 0 and \
(line.resNo() not in res_range) and \
(line.resNo_char() not in res_range):
continue
lines.append(line)
return lines
[docs]
def get_residue_lines(self, res_range=[]):
lines = []
for line in self.get_residues(res_range=res_range):
lines.append('%s'%line)
return lines
[docs]
class Residue:
def __init__(self, line):
self._diso = False
self._header = line[:6]
self._resName = line[17:20]
self._resNo = line[22:27]
self._chainID = line[21]
#
self._R = []
self._i_atm = []
self._atmName = []
def __len__(self):
return len(self._R)
[docs]
def append(self, line):
atmName = line[12:16].strip()
if len(atmName) == 4:
atmName = '%s%s'%(atmName[1:], atmName[0])
self._atmName.append(atmName)
self._i_atm.append(int(line[6:11]))
self._R.append((float(line[30:38]),\
float(line[38:46]),\
float(line[46:54])))
[docs]
def isResidue(self):
return True
[docs]
def isAtom(self):
return self._header[:4] == 'ATOM'
[docs]
def isHetatm(self):
return self._header == 'HETATM'
[docs]
def exists(self, atmName):
return atmName in self._atmName
[docs]
def check_bb(self):
stat = [False, False, False, False]
if 'N' in self._atmName:
stat[0] = True
if 'CA' in self._atmName:
stat[1] = True
if 'C' in self._atmName:
stat[2] = True
if 'O' in self._atmName:
stat[3] = True
#
if False in stat and not self._header == 'HETATM':
return False
else:
return True
[docs]
def write(self):
wrt = []
if not self._diso:
for i in range(len(self._R)):
wrt.append(self[i])
return ''.join(wrt)
def __repr__(self):
return self.write()
def __getitem__(self, i):
if isinstance(i, str):
i = self.atmIndex(i)
if len(self._atmName[i]) == 4:
atmName = '%s%s'%(self._atmName[i][-1],self._atmName[i][:3])
else:
atmName = ' %s'%self._atmName[i]
line = PDBfmt%(self._header, self._i_atm[i], atmName,\
self._resName, self._chainID, self._resNo,\
self._R[i][0], self._R[i][1], self._R[i][2])
return line
[docs]
def resName(self):
return self._resName
[docs]
def resNo(self):
return int(self._resNo[:4])
[docs]
def resNo_char(self):
return self._resNo
[docs]
def chainID(self):
return self._chainID
[docs]
def atmName(self):
return self._atmName
[docs]
def i_atm(self, atmName=None, atmIndex=None):
if atmName != None:
return self._i_atm[self.atmIndex(atmName)]
elif atmIndex != None:
return self._i_atm[atmIndex]
else:
return self._i_atm
[docs]
def R(self, atmName=None, atmIndex=None):
if atmName != None:
return self._R[self.atmIndex(atmName)]
elif atmIndex != None:
return self._R[atmIndex]
else:
return self._R
[docs]
def atmIndex(self, atmName):
return self._atmName.index(atmName)
[docs]
def get_backbone(self):
return [self._atmName.index("N"), self._atmName.index("CA"),\
self._atmName.index("C"), self._atmName.index("O")]
[docs]
def get_heavy(self):
heavy = []
for i,atm in enumerate(self._atmName):
if atm[0] != 'H':
heavy.append(i)
return heavy
[docs]
def get_sc(self):
sc = []
for i,atm in enumerate(self._atmName):
if atm in ["N", "CA", "C", "O"]:
continue
if atm[0] == 'H':
continue
sc.append(i)
return sc
[docs]
def get_CB(self):
if self._resName == 'GLY':
return [self._atmName.index("CA")]
else:
return [self._atmName.index("CB")]
[docs]
class Atom(Residue):
def __init__(self, line):
self._header = line[:6]
self._resName = line[17:20]
self._resNo = line[22:27]
self._chainID = line[21]
#
atmName = line[12:16].strip()
if len(atmName) == 4:
atmName = '%s%s'%(atmName[1:], atmName[0])
self._atmName = atmName
self._i_atm = int(line[6:11])
self._R = np.array((float(line[30:38]),\
float(line[38:46]),\
float(line[46:54])))
def __repr__(self):
if len(self._atmName) == 4:
atmName = '%s%s'%(self._atmName[-1],self._atmName[:3])
else:
atmName = ' %s'%self._atmName
line = PDBfmt%(self._header, self._i_atm, atmName,\
self._resName, self._chainID, self._resNo,\
self._R[0], self._R[1], self._R[2])
return line
[docs]
def R(self):
return self._R
[docs]
def i_atm(self):
return self._i_atm
[docs]
def atmName(self):
return self._atmName
[docs]
class PDBline:
def __init__(self, line):
self.line = line
def __repr__(self):
return self.line
[docs]
def isResidue(self):
return False
[docs]
def isAtom(self):
return False
[docs]
def isHetatm(self):
return False
[docs]
def startswith(self, key):
return self.line.startswith(key)
###############################################################################################################################
###############################################################################################################################
###############################################################################################################################
[docs]
class MOL2:
def __init__(self, mol2_fn):
self.mol2_fn = Path(mol2_fn)
self.model_s = []
self.is_model = False
def __repr__(self):
return self.mol2_fn
def __len__(self):
return len(self.model_s)
def __getitem__(self, i):
return self.model_s[i]
[docs]
def read(self, read_end = None, read_hydrogen=False):
self.model_s = []
i_model = 0
read_atom = False
read_bond = False
lines = self.mol2_fn.read_text().splitlines()
for line in lines:
if line.startswith('@<TRIPOS>MOLECULE'):
self.is_model = True
i_model += 1
if read_end and i_model > read_end:
break
model = MOL2_UNIT(model_no=i_model)
self.model_s.append(model)
read_atom = False
read_bond = False
elif line.startswith('@<TRIPOS>ATOM'):
read_atom = True
elif line.startswith('@<TRIPOS>BOND'):
read_atom = False
read_bond = True
elif line.startswith('@<TRIPOS>SUBSTRUCTURE'):
read_bond = False
elif line.startswith('@<TRIPOS>'):
read_atom = False
read_bond = False
elif read_atom:
try:
atom_id, _, x_crd, y_crd, z_crd, mol2_type = line.split()[:6]
except ValueError:
print(str(self.mol2_fn))
print(line)
exit()
if mol2_type[0] == 'H':
model.add_hydrogen_index(int(atom_id))
if not read_hydrogen:
continue
model.append_atom_index(int(atom_id))
model.append_atom_mol2_type(mol2_type)
model.append_coordinates([float(x_crd),float(y_crd),float(z_crd)])
elif read_bond:
splitted = line.split()
if len(splitted) < 4:
read_bond = False
continue
start = int(splitted[1])
end = int(splitted[2])
if start in model.get_hydrogen_set() or end in model.get_hydrogen_set():
if not read_hydrogen:
continue
model.update_bond(start,end,splitted[3])
try:
model.read_line(line)
except:
continue
return
[docs]
def write(self, model_index_start = 0, model_index_end = None):
if model_index_end == None:
model_index_end = len(self.model_s)
model_index = range(model_index_start, model_index_end)
out_lines = []
for i in model_index:
model_i_lines = self.model_s[i].write()
out_lines.append(model_i_lines)
out_lines = '\n'.join(out_lines)
return out_lines
[docs]
class MOL2_UNIT:
def __init__(self, model_no):
self.model_no = model_no
self.line_list = []
self.hydrogen_index_set = set()
self.atom_index_list = []
self.atom_mol2_type_list = []
self.bond_dict = {}
self.crd_list = []
def __getitem__(self, i):
return self.atom_idx_list[i]
def __len__(self):
return len(self.atom_idx_list)
[docs]
def read_line(self,line):
self.line_list.append(line)
return
[docs]
def add_hydrogen_index(self, index):
self.hydrogen_index_set.add(index)
return
[docs]
def append_atom_index(self,index):
self.atom_index_list.append(index)
return
[docs]
def append_atom_mol2_type(self,mol2_type):
self.atom_mol2_type_list.append(mol2_type)
return
[docs]
def update_bond(self,start,end,bond_type):
self.bond_dict[start,end] = bond_type
self.bond_dict[end,start] = bond_type
return
[docs]
def append_coordinates(self,tmp_crd_list):
self.crd_list.append(tmp_crd_list)
return
[docs]
def get_hydrogen_set(self):
return self.hydrogen_index_set
[docs]
def get_atom_index_list(self):
return self.atom_index_list
[docs]
def get_atom_mol2_type_list(self):
return self.atom_mol2_type_list
[docs]
def get_bond_dict(self):
return self.bond_dict
[docs]
def get_coordinates_np_array(self):
return np.array(self.crd_list,dtype=np.float32)
[docs]
def write(self):
out_lines = '\n'.join(self.line_list)
return out_lines
def __repr__(self):
return ''.join(self.write())