Source code for deepfold.data.tools.hmmsearch

# Copyright 2021 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#            http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""A Python wrapper for hmmsearch - search profile against a sequence db."""

import logging
import os
import subprocess
from typing import Optional, Sequence

from deepfold.data.search import parsers
from deepfold.data.tools import hmmbuild, utils

logger = logging.getLogger(__name__)


[docs] class Hmmsearch(object): """Python wrapper of the hmmsearch binary.""" def __init__( self, *, binary_path: str, hmmbuild_binary_path: str, database_path: str, flags: Optional[Sequence[str]] = None, ): """Initializes the Python hmmsearch wrapper. Args: binary_path: The path to the hmmsearch executable. hmmbuild_binary_path: The path to the hmmbuild executable. Used to build an hmm from an input a3m. database_path: The path to the hmmsearch database (FASTA format). flags: List of flags to be used by hmmsearch. Raises: RuntimeError: If hmmsearch binary not found within the path. """ self.binary_path = binary_path self.hmmbuild_runner = hmmbuild.Hmmbuild(binary_path=hmmbuild_binary_path) self.database_path = database_path if flags is None: # Default hmmsearch run settings. flags = [ "--F1", "0.1", "--F2", "0.1", "--F3", "0.1", "--incE", "100", "-E", "100", "--domE", "100", "--incdomE", "100", ] self.flags = flags if not os.path.exists(self.database_path): logger.error("Could not find hmmsearch database %s", database_path) raise ValueError(f"Could not find hmmsearch database {database_path}") @property def output_format(self) -> str: return "sto" @property def input_format(self) -> str: return "sto"
[docs] def query(self, msa_sto: str, output_dir: Optional[str] = None) -> str: """Queries the database using hmmsearch using a given stockholm msa.""" hmm = self.hmmbuild_runner.build_profile_from_sto(msa_sto, model_construction="hand") return self.query_with_hmm(hmm, output_dir)
[docs] def query_with_hmm(self, hmm: str, output_dir: Optional[str] = None) -> str: """Queries the database using hmmsearch using a given hmm.""" with utils.tmpdir_manager() as query_tmp_dir: hmm_input_path = os.path.join(query_tmp_dir, "query.hmm") output_dir = query_tmp_dir if output_dir is None else output_dir out_path = os.path.join(output_dir, "hmm_output.sto") with open(hmm_input_path, "w") as f: f.write(hmm) cmd = [ self.binary_path, "--noali", "--cpu", "8", ] # Don't include the alignment in stdout. # If adding flags, we have to do so before the output and input: if self.flags: cmd.extend(self.flags) cmd.extend( [ "-A", out_path, hmm_input_path, self.database_path, ] ) logger.info("Launching sub-process %s", cmd) process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) with utils.timing(f"hmmsearch ({os.path.basename(self.database_path)}) query"): stdout, stderr = process.communicate() retcode = process.wait() if retcode: raise RuntimeError("hmmsearch failed:\nstdout:\n%s\n\nstderr:\n%s\n" % (stdout.decode("utf-8"), stderr.decode("utf-8"))) with open(out_path) as f: out_msa = f.read() return out_msa
[docs] @staticmethod def get_template_hits(output_string: str, input_sequence: str) -> Sequence[parsers.TemplateHit]: """Gets parsed template hits from the raw string output by the tool.""" template_hits = parsers.parse_hmmsearch_sto( output_string, input_sequence, ) return template_hits