Mercurial > repos > abims-sbr > pairwise
annotate scripts/S06_post_processing_of_pairwise.py @ 1:c8af52875b0f draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
| author | lecorguille |
|---|---|
| date | Thu, 13 Apr 2017 09:46:45 -0400 |
| parents | |
| children | 6709645eff5d |
| rev | line source |
|---|---|
|
1
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
1 #!/usr/bin/env python |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
2 ## AUTHOR: Eric Fontanillas |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
3 ## LAST VERSION: 14/08/14 by Julie BAFFARD |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
4 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
5 MINIMUM_LENGTH = 1 #bp |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
6 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
7 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
8 ############################ |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
9 ##### DEF1 : Get Pairs ##### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
10 ############################ |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
11 def get_pairs(fasta_file_path): |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
12 F2 = open(fasta_file_path, "r") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
13 list_pairwises = [] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
14 while 1: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
15 next2 = F2.readline() |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
16 if not next2: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
17 break |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
18 if next2[0] == ">": |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
19 fasta_name_query = next2[:-1] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
20 next3 = F2.readline() |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
21 fasta_seq_query = next3[:-1] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
22 next3 = F2.readline() ## jump one empty line (if any after the sequence) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
23 fasta_name_match = next3[:-1] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
24 next3 = F2.readline() |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
25 fasta_seq_match = next3[:-1] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
26 pairwise = [fasta_name_query,fasta_seq_query,fasta_name_match,fasta_seq_match] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
27 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
28 ## ADD pairwise with condition |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
29 list_pairwises.append(pairwise) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
30 F2.close() |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
31 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
32 return(list_pairwises) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
33 ############################################## |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
34 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
35 ################################# |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
36 ##### DEF2 : Extract length ##### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
37 ################################# |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
38 def extract_length(length_string): # format length string = 57...902 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
39 l3 = string.split(length_string, "...") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
40 n1 = string.atoi(l3[0]) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
41 n2 = string.atoi(l3[1]) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
42 length = n2-n1 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
43 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
44 return(length) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
45 ############################################## |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
46 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
47 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
48 #################################### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
49 ##### DEF3 : Remove Redondancy ##### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
50 #################################### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
51 def filter_redondancy_and_length(list_paireu, MIN_LENGTH): |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
52 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
53 bash1 = {} |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
54 list_pairout = [] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
55 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
56 for pair in list_paireu: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
57 query_name = pair[0] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
58 query_seq = pair[1] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
59 match_name = pair[2] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
60 match_seq = pair[3] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
61 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
62 l1 = string.split(query_name, "||") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
63 short_query_name = l1[0][1:] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
64 length_matched = extract_length(l1[1]) ### DEF2 ### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
65 l2 = string.split(match_name, "||") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
66 short_match_name = l2[0][1:] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
67 binom = "%s_%s" %(short_query_name, short_match_name) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
68 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
69 ## TEST FOR REDONDANCY |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
70 ## REDONDANCY OF BINOME!!!! => MATCHE BETWEEN THE SAME 2 CONTIGS, BUT AT DIFFERENT POSITIONS ON THE CONTIG |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
71 ## REDONDANCY NOT REMOVED HERE: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
72 ## 1/ Several "TERA" match with one "APN" (Counted in script "09_formatMatch_getBackNucleotides.py") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
73 ## 2/ Several "APN" match with one "TERA" (Counted |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
74 if binom not in bash1.keys(): |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
75 bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
76 else: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
77 old_length = bash1[binom][-1] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
78 if length_matched > old_length: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
79 bash1[binom] = [query_name, query_seq, match_name, match_seq, length_matched] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
80 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
81 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
82 for bino in bash1.keys(): |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
83 length = bash1[bino][-1] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
84 if length > MIN_LENGTH: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
85 list_pairout.append(bash1[bino]) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
86 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
87 return(list_pairout) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
88 ############################################## |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
89 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
90 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
91 ####################### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
92 ##### RUN RUN RUN ##### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
93 ####################### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
94 import string, os, time, re, sys |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
95 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
96 ## 1 ## INPUT/OUTPUT |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
97 SHORT_FILE = sys.argv[1] #short-name-query_short-name-db |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
98 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
99 F_IN = "%s/06_PairwiseMatch_%s.fasta" %(SHORT_FILE, SHORT_FILE) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
100 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
101 F_OUT = "%s/09_PairwiseMatch_filtered_%s.fasta" %(SHORT_FILE, SHORT_FILE) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
102 File_OUT = open(F_OUT, "w") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
103 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
104 F_OUT2 = "%s/09_onlyMatch_filtered_%s.fasta" %(SHORT_FILE, SHORT_FILE) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
105 File_OUT2 = open(F_OUT2, "w") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
106 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
107 ## 2 ## RUN |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
108 list_pairwises = get_pairs(F_IN) ### DEF1 ### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
109 list_pairwises_filtered1 = filter_redondancy_and_length(list_pairwises, MINIMUM_LENGTH) ### DEF3 ### |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
110 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
111 i = 0 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
112 for pair in list_pairwises_filtered1: |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
113 i = i+1 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
114 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
115 ## Write pairwise alignment |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
116 File_OUT.write("%s\n" %pair[0]) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
117 File_OUT.write("%s\n" %pair[1]) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
118 File_OUT.write("%s\n" %pair[2]) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
119 File_OUT.write("%s\n" %pair[3]) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
120 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
121 ## Write only "matches" [AND UNGAP THEM: needed before the 2nd run of blast] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
122 File_OUT2.write("%s\n" %pair[2]) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
123 seq_match = pair[3] |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
124 seq_match_ungapped = string.replace(seq_match, "-", "") |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
125 File_OUT2.write("%s\n" %seq_match_ungapped) |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
126 |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
127 File_OUT.close() |
|
c8af52875b0f
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
128 File_OUT2.close() |
