[docs]defparse_segment(line_segment,query_ID,device):# remove only '\n'line_segment=[line.strip()forlineinline_segment]line_segment=list(filter(None,line_segment))target_info=line_segment[1]target_ID=target_info.split()[0].replace(">","")match_info=line_segment[2]line_segment=line_segment[3:]Query_seq_lines=[]Target_seq_lines=[]seq_map=[]query_seq=""target_seq=""confidence=""forlineinline_segment:ifquery_seq==""andf"Q {query_ID}"inline:query_start=int(line.split()[2])-1iftarget_seq==""andf"T {target_ID}"inline:target_start=int(line.split()[2])-1iff"Q {query_ID}"inline:query_seq+=line.split()[-3]eliff"T {target_ID}"inline:target_seq+=line.split()[-3]eliff"Confidence"inline:confidence+=line[22:]confidence=confidence.replace(" ","0")query_idx=query_starttarget_idx=target_startquery_map_idx=[]target_map_idx=[]confidence_tensor=[]# Iterate through the aligned sequences to create the mappingforq_char,t_char,confinzip(query_seq,target_seq,confidence):ifq_char!='-'andt_char!='-':query_map_idx.append(query_idx)target_map_idx.append(target_idx)confidence_tensor.append(float(conf)/10)query_idx+=1target_idx+=1elifq_char=='-':target_idx+=1elift_char=='-':query_idx+=1# Convert mapping to a tensorquery_map_idx=torch.tensor(query_map_idx,device=device)target_map_idx=torch.tensor(target_map_idx,device=device)confidence=torch.tensor(confidence_tensor,device=device)return{"target_ID":target_ID,"query_map_idx":query_map_idx,"target_map_idx":target_map_idx,"confidence":confidence,}
[docs]defhhpred_parser(hhpred_hhr_file,query_ID,device=torch.device('cpu')):withopen(hhpred_hhr_file,'r')asf:lines=f.readlines()line_seperated=[]line_segment=[]forlineinlines:ifline.startswith('No '):line_seperated.append(line_segment)line_segment=[]line_segment.append(line)# we don't need first line_segmentline_seperated=line_seperated[1:]parsed_info_list=[]forline_segmentinline_seperated:parsed_info=parse_segment(line_segment,query_ID,device)parsed_info_list.append(parsed_info)returnparsed_info_list