comparison scripts/S09_post_processing_of_pairwise.py @ 1:c8af52875b0f draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
author lecorguille
date Thu, 13 Apr 2017 09:46:45 -0400
parents
children 6709645eff5d
comparison
equal deleted inserted replaced
0:e95d4b20c62d 1:c8af52875b0f
1 #!/usr/bin/env python
2 ## AUTHOR: Eric Fontanillas
3 ## LAST VERSION: 14/08/14 by Julie BAFFARD
4
5 MINIMUM_LENGTH = 1
6
7 ############################
8 ##### DEF1 : Get Pairs #####
9 ############################
10 def get_pairs(fasta_file_path):
11 F2 = open(fasta_file_path, "r")
12 list_pairwises = []
13 while 1:
14 next2 = F2.readline()
15 if not next2:
16 break
17 if next2[0] == ">":
18 fasta_name_query = next2[:-1]
19 next3 = F2.readline()
20 fasta_seq_query = next3[:-1]
21 next3 = F2.readline() ## jump one empty line (if any after the sequence)
22 fasta_name_match = next3[:-1]
23 next3 = F2.readline()
24 fasta_seq_match = next3[:-1]
25 pairwise = [fasta_name_query,fasta_seq_query,fasta_name_match,fasta_seq_match]
26
27 ## ADD pairwise with condition
28 list_pairwises.append(pairwise)
29 F2.close()
30 return(list_pairwises)
31 ##############################################
32
33
34 #################################
35 ##### DEF2 : Extract length #####
36 #################################
37 def extract_length(length_string): # format length string = 57...902
38 l3 = string.split(length_string, "...")
39 n1 = string.atoi(l3[0])
40 n2 = string.atoi(l3[1])
41 length = n2-n1
42 return(length)
43 ##############################################
44
45
46 ####################################
47 ##### DEF3 : Remove Redondancy #####
48 ####################################
49 def filter_redondancy(list_paireu, MIN_LENGTH):
50
51 bash1 = {}
52 list_pairout = []
53
54 for pair in list_paireu:
55 query_name = pair[0]
56 query_seq = pair[1]
57 match_name = pair[2]
58 match_seq = pair[3]
59
60 l1 = string.split(query_name, "||")
61 short_query_name = l1[0][1:]
62 length_matched = extract_length(l1[1]) ### DEF2 ###
63 l2 = string.split(match_name, "||")
64 short_match_name = l2[0][1:]
65 binom = "%s_%s" %(short_query_name, short_match_name)
66
67 if binom not in bash1.keys():
68 bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched]
69 else:
70 old_length = bash1[binom][-1]
71 if length_matched > old_length:
72 bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched]
73
74
75 for bino in bash1.keys():
76 length = bash1[bino][-1]
77 if length > MIN_LENGTH:
78 list_pairout.append(bash1[bino])
79
80
81 return(list_pairout)
82 ##############################################
83
84
85 #######################
86 ##### RUN RUN RUN #####
87 #######################
88 import string, os, time, re, sys
89
90 ## 1 ## INPUT/OUTPUT
91 SHORT_FILE = sys.argv[1] ## short-name-query_short-name-db
92
93 F_IN = "%s/13_PairwiseMatch_%s.fasta" %(SHORT_FILE, SHORT_FILE)
94
95 F_OUT = "%s/15_PairwiseMatch_filtered_%s.fasta" %(SHORT_FILE, SHORT_FILE)
96 File_OUT = open(F_OUT, "w")
97
98 ## 2 ## RUN
99 list_pairwises = get_pairs(F_IN) ### DEF1 ###
100
101 list_pairwises_filtered1 = filter_redondancy(list_pairwises, MINIMUM_LENGTH) ### DEF3 ###
102
103
104 i = 0
105 for pair in list_pairwises_filtered1:
106 i = i+1
107
108 ## Write pairwise alignment
109 File_OUT.write("%s\n" %pair[0])
110 File_OUT.write("%s\n" %pair[1])
111 File_OUT.write("%s\n" %pair[2])
112 File_OUT.write("%s\n" %pair[3])
113
114 File_OUT.close()