"""Output of PDB files."""
from typing import Iterable
from Bio.Data.IUPACData import atom_weights
from Bio.PDB.StructureBuilder import StructureBuilder
_ATOM_FORMAT_STRING = "%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%s%6.2f %4s%2s%2s\n"
_TER_FORMAT_STRING = "TER %5i %3s %c%4i%c \n"
_REMARK_FORMAT_STRING = "REMARK {:<70s}\n"
[docs]
class IO:
"""Base class."""
def __init__(self) -> None:
pass
[docs]
def set_structure(self, pdb_object) -> None:
"""Set structures. Precedence Structures are overrideds."""
if pdb_object.level == "S":
structure = pdb_object
else: # Not a Structure
sb = StructureBuilder()
sb.init_structure("")
sb.init_seg(" ")
if pdb_object.level == "M":
sb.structure.add(pdb_object.copy())
self.structure = sb.structure
else: # Not a Model
sb.init_model(0)
if pdb_object.level == "C":
sb.structure[0].add(pdb_object.copy())
else: # Not a Chain
raise AttributeError(f"{pdb_object} is not one of Structure, Model, or Chain. ({pdb_object.level})")
structure = sb.get_structure()
self.structure = structure
[docs]
def set_chains(self, *chains) -> None:
sb = StructureBuilder()
sb.init_structure("")
sb.init_seg(" ")
sb.init_model(0)
for chain in chains:
if chain.level == "C":
sb.structure[0].add(chain.copy())
else:
AttributeError(f"{chain} is not a Chain instance. ({chain.level})")
self.structure = sb.get_structure()
[docs]
class PDBIO(IO):
"""Write a Structure object as a PDB file."""
def __init__(self, use_model_flag: int = 0) -> None:
super().__init__()
self.use_model_flag = use_model_flag
def _get_atom_line(
self,
atom,
hetfield,
segid,
atom_number,
resname,
resseq,
icode,
chain_id,
charge=" ",
) -> str:
"""Return an ATOM PDB string."""
if hetfield != " ":
record_type = "HETATM"
else:
record_type = "ATOM "
# Atom properties
# Check if the atom serial number is an integer
# Not always the case for structures built from
# mmCIF files.
try:
atom_number = int(atom_number)
except ValueError:
raise ValueError(
f"{atom_number!r} is not a number."
"Atom serial numbers must be numerical"
" If you are converting from an mmCIF"
" structure, try using"
" preserve_atom_numbering=False"
)
if atom_number > 99999:
raise ValueError(f"Atom serial number ('{atom_number}') exceeds PDB format limit.")
# Check if the element is valid, unknown (X), or blank
if atom.element:
element = atom.element.strip().upper()
if element.capitalize() not in atom_weights and element != "X":
raise ValueError(f"Unrecognised element {atom.element}")
element = element.rjust(2)
else:
element = " "
# Format atom name
# Pad if:
# - smaller than 4 characters
# AND - is not C, N, O, S, H, F, P, ..., one letter elements
# AND - first character is NOT numeric (funky hydrogen naming rules)
name = atom.fullname.strip()
if len(name) < 4 and name[:1].isalpha() and len(element.strip()) < 2:
name = " " + name
altloc = atom.altloc
x, y, z = atom.coord
# Write PDB format line
bfactor = atom.bfactor
try:
occupancy = f"{atom.occupancy:6.2f}"
except (TypeError, ValueError):
if atom.occupancy is None:
occupancy = " " * 6 # Missing occupancy is written as blank.
else:
raise ValueError(f"Invalid occupancy value: {atom.occupancy!r}") from None
args = (
record_type,
atom_number,
name,
altloc,
resname,
chain_id,
resseq,
icode,
x,
y,
z,
occupancy,
bfactor,
segid,
element,
charge,
)
return _ATOM_FORMAT_STRING % args
[docs]
def save(
self,
file,
write_end: bool = True,
preserve_atom_numbering: bool = False,
remarks: str | Iterable[str] = "",
):
"""Save structure to a file."""
if isinstance(remarks, str):
remarks = [remarks]
remarks = list(remarks)
if isinstance(file, str):
fhandle = open(file, "w")
else:
fhandle = file # TODO: Check the type
get_atom_line = self._get_atom_line
# Remarks
for remark in remarks:
fhandle.write(_REMARK_FORMAT_STRING.format(remark))
# multiple models?
if len(self.structure) > 1 or self.use_model_flag:
model_flag = 1
else:
model_flag = 0
for model in self.structure.get_list():
# Necessary for ENDMDL
# Do not write ENDMDL if no residues were written for this model
model_residues_written = 0
if not preserve_atom_numbering:
atom_number = 1
if model_flag:
fhandle.write(f"MODEL {model.serial_num}\n")
for chain in model.get_list():
chain_id = chain.id
if len(chain_id) > 1:
e = f"Chain id ('{chain_id}') exceeds PDB format limit."
raise RuntimeError(e)
# Necessary for TER
# Do not write TER if no residues were written for this chain
chain_residues_written = 0
for residue in chain.get_unpacked_list():
hetfield, resseq, icode = residue.id
resname = residue.resname
segid = residue.segid
resid = residue.id[1]
if resid > 9999:
e = f"Residue number ('{resid}') exceeds PDB format limit."
raise RuntimeError(e)
for atom in residue.get_unpacked_list():
chain_residues_written = 1
model_residues_written = 1
if preserve_atom_numbering:
atom_number = atom.serial_number
try:
s = get_atom_line(
atom,
hetfield,
segid,
atom_number,
resname,
resseq,
icode,
chain_id,
)
except Exception as err:
# catch and re-raise with more information
raise RuntimeError(f"Error when writing atom {atom.full_id}") from err
else:
fhandle.write(s)
# inconsequential if preserve_atom_numbering is True
atom_number += 1
if chain_residues_written:
fhandle.write(_TER_FORMAT_STRING % (atom_number, resname, chain_id, resseq, icode))
if model_flag and model_residues_written:
fhandle.write("ENDMDL\n")
if write_end:
fhandle.write("END \n")
if isinstance(file, str):
fhandle.close()