Mercurial > repos > abims-sbr > pairwise
diff scripts/S11_post_processing_of_pairwise.py @ 4:6709645eff5d draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
| author | abims-sbr |
|---|---|
| date | Wed, 17 Jan 2018 08:53:53 -0500 |
| parents | c8af52875b0f |
| children |
line wrap: on
line diff
--- a/scripts/S11_post_processing_of_pairwise.py Wed Sep 27 10:01:55 2017 -0400 +++ b/scripts/S11_post_processing_of_pairwise.py Wed Jan 17 08:53:53 2018 -0500 @@ -4,88 +4,13 @@ MINIMUM_LENGTH = 1 -############################ -##### DEF1 : Get Pairs ##### -############################ -def get_pairs(fasta_file_path): - F2 = open(fasta_file_path, "r") - list_pairwises = [] - while 1: - next2 = F2.readline() - if not next2: - break - if next2[0] == ">": - fasta_name_query = next2[:-1] - next3 = F2.readline() - fasta_seq_query = next3[:-1] - next3 = F2.readline() ## jump one empty line (if any after the sequence) - fasta_name_match = next3[:-1] - next3 = F2.readline() - fasta_seq_match = next3[:-1] - pairwise = [fasta_name_query,fasta_seq_query,fasta_name_match,fasta_seq_match] - - ## ADD pairwise with condition - list_pairwises.append(pairwise) - F2.close() - return(list_pairwises) -############################################## - - -################################# -##### DEF2 : Extract length ##### -################################# -def extract_length(length_string): # format length string = 57...902 - l3 = string.split(length_string, "...") - n1 = string.atoi(l3[0]) - n2 = string.atoi(l3[1]) - length = n2-n1 - return(length) -############################################## - - -#################################### -##### DEF3 : Remove Redondancy ##### -#################################### -def filter_redondancy(list_paireu, MIN_LENGTH): - - bash1 = {} - list_pairout = [] - - for pair in list_paireu: - query_name = pair[0] - query_seq = pair[1] - match_name = pair[2] - match_seq = pair[3] - - l1 = string.split(query_name, "||") - short_query_name = l1[0][1:] - length_matched = extract_length(l1[1]) ### DEF2 ### - l2 = string.split(match_name, "||") - short_match_name = l2[0][1:] - binom = "%s_%s" %(short_query_name, short_match_name) - - if binom not in bash1.keys(): - bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched] - else: - old_length = bash1[binom][-1] - if length_matched > old_length: - bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched] - - - for bino in bash1.keys(): - length = bash1[bino][-1] - if length > MIN_LENGTH: - list_pairout.append(bash1[bino]) - - return(list_pairout) -############################################## - - ####################### ##### RUN RUN RUN ##### ####################### import string, os, time, re, sys +from functions import get_pairs, extract_length, filter_redondancy +## 1 ## INPUT/OUTPUT SHORT_FILE = sys.argv[1] ## short-name-query_short-name-db F_IN = "%s/17_ReciprocalHits_%s.fasta" %(SHORT_FILE, SHORT_FILE)
