# Copyright 2021 AlQuraishi Laboratory# Copyright 2021 DeepMind Technologies Limited## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License."""Library to run HHsearch from Python."""importglobimportloggingimportosimportsubprocessfromtypingimportOptional,Sequencefromdeepfold.data.searchimportparsersfromdeepfold.data.toolsimportutils
[docs]classHHSearch:"""Python wrapper of the HHsearch binary."""def__init__(self,*,binary_path:str,databases:Sequence[str],n_cpu:int=2,maxseq:int=1_000_000,):"""Initializes the Python HHsearch wrapper. Args: binary_path: The path to the HHsearch executable. databases: A sequence of HHsearch database paths. This should be the common prefix for the database files (i.e. up to but not including _hhm.ffindex etc.) n_cpu: The number of CPUs to use maxseq: The maximum number of rows in an input alignment. Note that this parameter is only supported in HHBlits version 3.1 and higher. Raises: RuntimeError: If HHsearch binary not found within the path. """self.binary_path=binary_pathself.databases=databasesself.n_cpu=n_cpuself.maxseq=maxseqfordatabase_pathinself.databases:ifnotglob.glob(database_path+"_*"):logging.error("Could not find HHsearch database %s",database_path)raiseValueError(f"Could not find HHsearch database {database_path}")@propertydefoutput_format(self)->str:return"hhr"@propertydefinput_format(self)->str:return"a3m"
[docs]defquery(self,a3m:str,output_dir:Optional[str]=None)->str:"""Queries the database using HHsearch using a given a3m."""withutils.tmpdir_manager()asquery_tmp_dir:input_path=os.path.join(query_tmp_dir,"query.a3m")output_dir=query_tmp_dirifoutput_dirisNoneelseoutput_dirhhr_path=os.path.join(output_dir,"hhsearch_output.hhr")withopen(input_path,"w")asf:f.write(a3m)db_cmd=[]fordb_pathinself.databases:db_cmd.append("-d")db_cmd.append(db_path)cmd=[self.binary_path,"-i",input_path,"-o",hhr_path,"-maxseq",str(self.maxseq),"-cpu",str(self.n_cpu),]+db_cmdlogging.info('Launching subprocess "%s"'," ".join(cmd))process=subprocess.Popen(cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)withutils.timing("HHsearch query"):stdout,stderr=process.communicate()retcode=process.wait()ifretcode:# Stderr is truncated to prevent proto size errors in Beam.raiseRuntimeError("HHSearch failed:\nstdout:\n%s\n\nstderr:\n%s\n"%(stdout.decode("utf-8"),stderr[:100_000].decode("utf-8")))withopen(hhr_path)asf:hhr=f.read()returnhhr
[docs]@staticmethoddefget_template_hits(output_string:str,input_sequence:str)->Sequence[parsers.TemplateHit]:"""Gets parsed template hits from the raw string output by the tool"""delinput_sequence# Used by hmmsearch but not needed for hhsearchreturnparsers.parse_hhr(output_string)