Source code for miniworld.utils.hhpred_parser

import torch

[docs] def parse_segment(line_segment, query_ID,device): # remove only '\n' line_segment = [line.strip() for line in line_segment] line_segment = list(filter(None, line_segment)) target_info = line_segment[1] target_ID = target_info.split()[0].replace(">","") match_info = line_segment[2] line_segment = line_segment[3:] Query_seq_lines = [] Target_seq_lines = [] seq_map = [] query_seq = "" target_seq = "" confidence = "" for line in line_segment : if query_seq == "" and f"Q {query_ID}" in line: query_start = int(line.split()[2]) -1 if target_seq == "" and f"T {target_ID}" in line: target_start = int(line.split()[2]) -1 if f"Q {query_ID}" in line: query_seq += line.split()[-3] elif f"T {target_ID}" in line: target_seq += line.split()[-3] elif f"Confidence" in line: confidence += line[22:] confidence = confidence.replace(" ", "0") query_idx = query_start target_idx = target_start query_map_idx = [] target_map_idx = [] confidence_tensor = [] # Iterate through the aligned sequences to create the mapping for q_char, t_char, conf in zip(query_seq, target_seq, confidence): if q_char != '-' and t_char != '-': query_map_idx.append(query_idx) target_map_idx.append(target_idx) confidence_tensor.append(float(conf)/10) query_idx += 1 target_idx += 1 elif q_char == '-': target_idx += 1 elif t_char == '-': query_idx += 1 # Convert mapping to a tensor query_map_idx = torch.tensor(query_map_idx, device=device) target_map_idx = torch.tensor(target_map_idx, device=device) confidence = torch.tensor(confidence_tensor, device=device) return { "target_ID" : target_ID, "query_map_idx" : query_map_idx, "target_map_idx" : target_map_idx, "confidence" : confidence, }
[docs] def hhpred_parser(hhpred_hhr_file, query_ID, device=torch.device('cpu')): with open(hhpred_hhr_file, 'r') as f: lines = f.readlines() line_seperated = [] line_segment = [] for line in lines: if line.startswith('No '): line_seperated.append(line_segment) line_segment = [] line_segment.append(line) # we don't need first line_segment line_seperated = line_seperated[1:] parsed_info_list = [] for line_segment in line_seperated: parsed_info = parse_segment(line_segment, query_ID, device) parsed_info_list.append(parsed_info) return parsed_info_list
if __name__ == "__main__": hhpred_hhr_file = "/home/casp16/run/TS.human/T1207/MiniWorld/templates/hhpred_6231556.hhr" query_ID = "T1207" hhpred_parser(hhpred_hhr_file, query_ID)