Mercurial > repos > abims-sbr > pairwise
comparison scripts/S09_post_processing_of_pairwise.py @ 1:c8af52875b0f draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
| author | lecorguille |
|---|---|
| date | Thu, 13 Apr 2017 09:46:45 -0400 |
| parents | |
| children | 6709645eff5d |
comparison
equal
deleted
inserted
replaced
| 0:e95d4b20c62d | 1:c8af52875b0f |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 ## AUTHOR: Eric Fontanillas | |
| 3 ## LAST VERSION: 14/08/14 by Julie BAFFARD | |
| 4 | |
| 5 MINIMUM_LENGTH = 1 | |
| 6 | |
| 7 ############################ | |
| 8 ##### DEF1 : Get Pairs ##### | |
| 9 ############################ | |
| 10 def get_pairs(fasta_file_path): | |
| 11 F2 = open(fasta_file_path, "r") | |
| 12 list_pairwises = [] | |
| 13 while 1: | |
| 14 next2 = F2.readline() | |
| 15 if not next2: | |
| 16 break | |
| 17 if next2[0] == ">": | |
| 18 fasta_name_query = next2[:-1] | |
| 19 next3 = F2.readline() | |
| 20 fasta_seq_query = next3[:-1] | |
| 21 next3 = F2.readline() ## jump one empty line (if any after the sequence) | |
| 22 fasta_name_match = next3[:-1] | |
| 23 next3 = F2.readline() | |
| 24 fasta_seq_match = next3[:-1] | |
| 25 pairwise = [fasta_name_query,fasta_seq_query,fasta_name_match,fasta_seq_match] | |
| 26 | |
| 27 ## ADD pairwise with condition | |
| 28 list_pairwises.append(pairwise) | |
| 29 F2.close() | |
| 30 return(list_pairwises) | |
| 31 ############################################## | |
| 32 | |
| 33 | |
| 34 ################################# | |
| 35 ##### DEF2 : Extract length ##### | |
| 36 ################################# | |
| 37 def extract_length(length_string): # format length string = 57...902 | |
| 38 l3 = string.split(length_string, "...") | |
| 39 n1 = string.atoi(l3[0]) | |
| 40 n2 = string.atoi(l3[1]) | |
| 41 length = n2-n1 | |
| 42 return(length) | |
| 43 ############################################## | |
| 44 | |
| 45 | |
| 46 #################################### | |
| 47 ##### DEF3 : Remove Redondancy ##### | |
| 48 #################################### | |
| 49 def filter_redondancy(list_paireu, MIN_LENGTH): | |
| 50 | |
| 51 bash1 = {} | |
| 52 list_pairout = [] | |
| 53 | |
| 54 for pair in list_paireu: | |
| 55 query_name = pair[0] | |
| 56 query_seq = pair[1] | |
| 57 match_name = pair[2] | |
| 58 match_seq = pair[3] | |
| 59 | |
| 60 l1 = string.split(query_name, "||") | |
| 61 short_query_name = l1[0][1:] | |
| 62 length_matched = extract_length(l1[1]) ### DEF2 ### | |
| 63 l2 = string.split(match_name, "||") | |
| 64 short_match_name = l2[0][1:] | |
| 65 binom = "%s_%s" %(short_query_name, short_match_name) | |
| 66 | |
| 67 if binom not in bash1.keys(): | |
| 68 bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched] | |
| 69 else: | |
| 70 old_length = bash1[binom][-1] | |
| 71 if length_matched > old_length: | |
| 72 bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched] | |
| 73 | |
| 74 | |
| 75 for bino in bash1.keys(): | |
| 76 length = bash1[bino][-1] | |
| 77 if length > MIN_LENGTH: | |
| 78 list_pairout.append(bash1[bino]) | |
| 79 | |
| 80 | |
| 81 return(list_pairout) | |
| 82 ############################################## | |
| 83 | |
| 84 | |
| 85 ####################### | |
| 86 ##### RUN RUN RUN ##### | |
| 87 ####################### | |
| 88 import string, os, time, re, sys | |
| 89 | |
| 90 ## 1 ## INPUT/OUTPUT | |
| 91 SHORT_FILE = sys.argv[1] ## short-name-query_short-name-db | |
| 92 | |
| 93 F_IN = "%s/13_PairwiseMatch_%s.fasta" %(SHORT_FILE, SHORT_FILE) | |
| 94 | |
| 95 F_OUT = "%s/15_PairwiseMatch_filtered_%s.fasta" %(SHORT_FILE, SHORT_FILE) | |
| 96 File_OUT = open(F_OUT, "w") | |
| 97 | |
| 98 ## 2 ## RUN | |
| 99 list_pairwises = get_pairs(F_IN) ### DEF1 ### | |
| 100 | |
| 101 list_pairwises_filtered1 = filter_redondancy(list_pairwises, MINIMUM_LENGTH) ### DEF3 ### | |
| 102 | |
| 103 | |
| 104 i = 0 | |
| 105 for pair in list_pairwises_filtered1: | |
| 106 i = i+1 | |
| 107 | |
| 108 ## Write pairwise alignment | |
| 109 File_OUT.write("%s\n" %pair[0]) | |
| 110 File_OUT.write("%s\n" %pair[1]) | |
| 111 File_OUT.write("%s\n" %pair[2]) | |
| 112 File_OUT.write("%s\n" %pair[3]) | |
| 113 | |
| 114 File_OUT.close() |
