Mercurial > repos > abims-sbr > pairwise
view scripts/S09_post_processing_of_pairwise.py @ 1:c8af52875b0f draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
| author | lecorguille |
|---|---|
| date | Thu, 13 Apr 2017 09:46:45 -0400 |
| parents | |
| children | 6709645eff5d |
line wrap: on
line source
#!/usr/bin/env python ## AUTHOR: Eric Fontanillas ## LAST VERSION: 14/08/14 by Julie BAFFARD MINIMUM_LENGTH = 1 ############################ ##### DEF1 : Get Pairs ##### ############################ def get_pairs(fasta_file_path): F2 = open(fasta_file_path, "r") list_pairwises = [] while 1: next2 = F2.readline() if not next2: break if next2[0] == ">": fasta_name_query = next2[:-1] next3 = F2.readline() fasta_seq_query = next3[:-1] next3 = F2.readline() ## jump one empty line (if any after the sequence) fasta_name_match = next3[:-1] next3 = F2.readline() fasta_seq_match = next3[:-1] pairwise = [fasta_name_query,fasta_seq_query,fasta_name_match,fasta_seq_match] ## ADD pairwise with condition list_pairwises.append(pairwise) F2.close() return(list_pairwises) ############################################## ################################# ##### DEF2 : Extract length ##### ################################# def extract_length(length_string): # format length string = 57...902 l3 = string.split(length_string, "...") n1 = string.atoi(l3[0]) n2 = string.atoi(l3[1]) length = n2-n1 return(length) ############################################## #################################### ##### DEF3 : Remove Redondancy ##### #################################### def filter_redondancy(list_paireu, MIN_LENGTH): bash1 = {} list_pairout = [] for pair in list_paireu: query_name = pair[0] query_seq = pair[1] match_name = pair[2] match_seq = pair[3] l1 = string.split(query_name, "||") short_query_name = l1[0][1:] length_matched = extract_length(l1[1]) ### DEF2 ### l2 = string.split(match_name, "||") short_match_name = l2[0][1:] binom = "%s_%s" %(short_query_name, short_match_name) if binom not in bash1.keys(): bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched] else: old_length = bash1[binom][-1] if length_matched > old_length: bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched] for bino in bash1.keys(): length = bash1[bino][-1] if length > MIN_LENGTH: list_pairout.append(bash1[bino]) return(list_pairout) ############################################## ####################### ##### RUN RUN RUN ##### ####################### import string, os, time, re, sys ## 1 ## INPUT/OUTPUT SHORT_FILE = sys.argv[1] ## short-name-query_short-name-db F_IN = "%s/13_PairwiseMatch_%s.fasta" %(SHORT_FILE, SHORT_FILE) F_OUT = "%s/15_PairwiseMatch_filtered_%s.fasta" %(SHORT_FILE, SHORT_FILE) File_OUT = open(F_OUT, "w") ## 2 ## RUN list_pairwises = get_pairs(F_IN) ### DEF1 ### list_pairwises_filtered1 = filter_redondancy(list_pairwises, MINIMUM_LENGTH) ### DEF3 ### i = 0 for pair in list_pairwises_filtered1: i = i+1 ## Write pairwise alignment File_OUT.write("%s\n" %pair[0]) File_OUT.write("%s\n" %pair[1]) File_OUT.write("%s\n" %pair[2]) File_OUT.write("%s\n" %pair[3]) File_OUT.close()
