Mercurial > repos > abims-sbr > cds_search
annotate scripts/S01_find_orf_on_multiple_alignment.py @ 9:640ef4c06ed5 draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
| author | abims-sbr |
|---|---|
| date | Tue, 03 Jul 2018 10:54:18 -0400 |
| parents | 716a45028e55 |
| children | 3d00be2d05f3 |
| rev | line source |
|---|---|
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
1 #!/usr/bin/python |
|
8
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
2 # coding: utf8 |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
3 ## Author: Eric Fontanillas |
|
8
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
4 ## Modification: 03/09/14 by Julie BAFFARD |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
5 ## Last modification : 05/03/18 by Victor Mataigne |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
6 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
7 ## Description: Predict potential ORF on the basis of 2 criteria + 1 optional criteria |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
8 ## CRITERIA 1 ## Longest part of the alignment of sequence without codon stop "*", tested in the 3 potential ORF |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
9 ## CRITERIA 2 ## This longest part should be > 150nc or 50aa |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
10 ## CRITERIA 3 ## [OPTIONNAL] A codon start "M" should be present in this longuest part, before the last 50 aa |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
11 ## OUTPUTs "05_CDS_aa" & "05_CDS_nuc" => NOT INCLUDE THIS CRITERIA |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
12 ## OUTPUTs "06_CDS_with_M_aa" & "06_CDS_with_M_nuc" => INCLUDE THIS CRITERIA |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
13 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
14 #################################################### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
15 ###### DEF 2 : Create bash for genetic code ######## |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
16 #################################################### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
17 ### KEY = codon |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
18 ### VALUE = Amino Acid |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
19 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
20 def code_universel(F1): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
21 bash_codeUniversel = {} |
|
7
35e39b4128ba
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents:
3
diff
changeset
|
22 with open(F1, "r") as file: |
|
35e39b4128ba
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents:
3
diff
changeset
|
23 for line in file.readlines(): |
|
35e39b4128ba
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents:
3
diff
changeset
|
24 L1 = string.split(line, " ") |
|
35e39b4128ba
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents:
3
diff
changeset
|
25 length1 = len(L1) |
|
35e39b4128ba
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents:
3
diff
changeset
|
26 if length1 == 3: |
|
35e39b4128ba
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents:
3
diff
changeset
|
27 key = L1[0] |
|
35e39b4128ba
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents:
3
diff
changeset
|
28 value = L1[2][:-1] |
|
35e39b4128ba
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents:
3
diff
changeset
|
29 bash_codeUniversel[key] = value |
|
35e39b4128ba
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents:
3
diff
changeset
|
30 else: |
|
35e39b4128ba
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents:
3
diff
changeset
|
31 key = L1[0] |
|
35e39b4128ba
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents:
3
diff
changeset
|
32 value = L1[2] |
|
35e39b4128ba
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents:
3
diff
changeset
|
33 bash_codeUniversel[key] = value |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
34 return(bash_codeUniversel) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
35 ########################################################### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
36 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
37 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
38 ###################################################################################################################### |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
39 ##### DEF 3 : Test if the sequence is a multiple of 3, and if not correct the sequence to become a multiple of 3 ##### |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
40 ###################################################################################################################### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
41 ### WEAKNESS OF THAT APPROACH = I remove extra base(s) at the end of the sequence ==> I can lost a codon, when I test ORF (as I will decay the ORF) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
42 def multiple3(seq): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
43 leng = len(seq) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
44 modulo = leng%3 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
45 if modulo == 0: # the results of dividing leng per 3 is an integer |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
46 new_seq = seq |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
47 elif modulo == 1: # means 1 extra nc (nucleotid) needs to be removed (the remaining of modulo indicate the part which is non-dividable per 3) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
48 new_seq = seq[:-1] # remove the last nc |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
49 elif modulo == 2: # means 2 extra nc (nucleotid) needs to be removed (the remaining of modulo indicate the part which is non-dividable per 3) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
50 new_seq = seq[:-2] # remove the 2 last nc |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
51 len1 = len(new_seq) |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
52 return(new_seq, modulo) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
53 ########################################################## |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
54 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
55 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
56 ############################# |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
57 ###### DEF 4 : GET ORF ###### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
58 ############################# |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
59 ##- MULTIPLE SEQUENCE BASED : Based on ALIGNMENT of several sequences |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
60 ##- CRITERIA1: Get the segment in the alignment with no codon stop |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
61 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
62 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
63 ###### DEF 4 - Part 1 - ###### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
64 ############################## |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
65 def simply_get_ORF(seq_dna, bash_codeUniversel): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
66 seq_aa = "" |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
67 i = 0 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
68 len1 = len(seq_dna) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
69 while i < len1: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
70 base1 = seq_dna[i] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
71 base1 = string.capitalize(base1) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
72 base2 = seq_dna[i+1] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
73 base2 = string.capitalize(base2) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
74 base3 = seq_dna[i+2] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
75 base3 = string.capitalize(base3) |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
76 |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
77 codon = base1+base2+base3 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
78 codon = string.replace(codon, "T", "U") |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
79 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
80 if codon in bash_codeUniversel.keys(): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
81 aa = bash_codeUniversel[codon] |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
82 seq_aa = seq_aa + aa |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
83 else: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
84 seq_aa = seq_aa +"?" ### Take account for gap "-" and "N" |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
85 i = i + 3 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
86 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
87 return(seq_aa) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
88 ########################################################## |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
89 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
90 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
91 ###### DEF 4 - Part 2 - ###### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
92 ############################## |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
93 def find_good_ORF_criteria_3(bash_aligned_nc_seq, bash_codeUniversel): |
|
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
94 |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
95 ## 1 ## Get the list of aligned aa seq for the 3 ORF: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
96 bash_of_aligned_aa_seq_3ORF = {} |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
97 bash_of_aligned_nuc_seq_3ORF = {} |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
98 BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION = [] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
99 for fasta_name in bash_aligned_nc_seq.keys(): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
100 ## 1.1. ## Get the raw sequence |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
101 sequence_nc = bash_aligned_nc_seq[fasta_name] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
102 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
103 ## 1.2. ## Check whether the sequence is multiple of 3, and correct it if not: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
104 new_sequence_nc, modulo = multiple3(sequence_nc) ### DEF 3 ### |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
105 |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
106 ## 1.3. ## Get the 3 ORFs (nuc) for each sequence |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
107 seq_nuc_ORF1 = new_sequence_nc |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
108 seq_nuc_ORF2 = new_sequence_nc[1:-2] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
109 seq_nuc_ORF3 = new_sequence_nc[2:-1] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
110 seq_reversed=ReverseComplement2(seq_nuc_ORF1) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
111 seq_nuc_ORF4=seq_reversed |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
112 seq_nuc_ORF5=seq_reversed[1:-2] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
113 seq_nuc_ORF6=seq_reversed[2:-1] |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
114 |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
115 LIST_6_ORF_nuc = [seq_nuc_ORF1, seq_nuc_ORF2, seq_nuc_ORF3,seq_nuc_ORF4,seq_nuc_ORF5,seq_nuc_ORF6] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
116 bash_of_aligned_nuc_seq_3ORF[fasta_name] = LIST_6_ORF_nuc ### For each seq of the multialignment => give the 6 ORFs (in nuc) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
117 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
118 ## 1.4. ## Get the 3 ORFs (aa) for each sequence |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
119 seq_prot_ORF1 = simply_get_ORF(seq_nuc_ORF1,bash_codeUniversel) ### DEF 4 - Part 1 - ## |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
120 seq_prot_ORF2 = simply_get_ORF(seq_nuc_ORF2,bash_codeUniversel) ### DEF 4 - Part 1 - ## |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
121 seq_prot_ORF3 = simply_get_ORF(seq_nuc_ORF3,bash_codeUniversel) ### DEF 4 - Part 1 - ## |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
122 seq_prot_ORF4 = simply_get_ORF(seq_nuc_ORF4,bash_codeUniversel) ### DEF 4 - Part 1 - ## |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
123 seq_prot_ORF5 = simply_get_ORF(seq_nuc_ORF5,bash_codeUniversel) ### DEF 4 - Part 1 - ## |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
124 seq_prot_ORF6 = simply_get_ORF(seq_nuc_ORF6,bash_codeUniversel) ### DEF 4 - Part 1 - ## |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
125 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
126 LIST_6_ORF_aa = [seq_prot_ORF1, seq_prot_ORF2, seq_prot_ORF3,seq_prot_ORF4,seq_prot_ORF5,seq_prot_ORF6] |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
127 bash_of_aligned_aa_seq_3ORF[fasta_name] = LIST_6_ORF_aa ### For each seq of the multialignment => give the 6 ORFs (in aa) |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
128 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
129 ## 2 ## Test for the best ORF (Get the longuest segment in the alignment with no codon stop ... for each ORF ... the longuest should give the ORF) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
130 BEST_MAX = 0 |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
131 for i in [0,1,2,3,4,5]: ### Test the 6 ORFs |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
132 ORF_Aligned_aa = [] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
133 ORF_Aligned_nuc = [] |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
134 |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
135 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
136 ## 2.1 ## Get the alignment of sequence for a given ORF |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
137 ## Compare the 1rst ORF between all sequence => list them in ORF_Aligned_aa // them do the same for the second ORF, and them the 3rd |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
138 for fasta_name in bash_of_aligned_aa_seq_3ORF.keys(): |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
139 ORFsequence = bash_of_aligned_aa_seq_3ORF[fasta_name][i] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
140 aa_length = len(ORFsequence) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
141 ORF_Aligned_aa.append(ORFsequence) ### List of all sequences in the ORF nb "i" = |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
142 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
143 n = i+1 |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
144 |
|
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
145 for fasta_name in bash_of_aligned_nuc_seq_3ORF.keys(): |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
146 ORFsequence = bash_of_aligned_nuc_seq_3ORF[fasta_name][i] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
147 nuc_length = len(ORFsequence) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
148 ORF_Aligned_nuc.append(ORFsequence) ### List of all sequences in the ORF nb "i" = |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
149 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
150 ## 2.2 ## Get the list of sublist of positions whithout codon stop in the alignment |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
151 ## For each ORF, now we have the list of sequences available (i.e. THE ALIGNMENT IN A GIVEN ORF) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
152 ## Next step is to get the longuest subsequence whithout stop |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
153 ## We will explore the presence of stop "*" in each column of the alignment, and get the positions of the segments between the positions with "*" |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
154 MAX_LENGTH = 0 |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
155 LONGUEST_SEGMENT_UNSTOPPED = "" |
|
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
156 j = 0 # Start from first position in alignment |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
157 List_of_List_subsequences = [] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
158 List_positions_subsequence = [] |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
159 while j < aa_length: |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
160 column = [] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
161 for seq in ORF_Aligned_aa: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
162 column.append(seq[j]) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
163 j = j+1 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
164 if "*" in column: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
165 List_of_List_subsequences.append(List_positions_subsequence) ## Add previous list of positions |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
166 List_positions_subsequence = [] ## Re-initialyse list of positions |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
167 else: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
168 List_positions_subsequence.append(j) |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
169 |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
170 ## 2.3 ## Among all the sublists (separated by column with codon stop "*"), get the longuest one (BETTER SEGMENT for a given ORF) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
171 LONGUEST_SUBSEQUENCE_LIST_POSITION = [] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
172 MAX=0 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
173 for sublist in List_of_List_subsequences: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
174 if len(sublist) > MAX and len(sublist) > MINIMAL_CDS_LENGTH: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
175 MAX = len(sublist) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
176 LONGUEST_SUBSEQUENCE_LIST_POSITION = sublist |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
177 |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
178 ## 2.4. ## Test if the longuest subsequence start exactly at the beginning of the original sequence (i.e. means the ORF maybe truncated) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
179 if LONGUEST_SUBSEQUENCE_LIST_POSITION != []: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
180 if LONGUEST_SUBSEQUENCE_LIST_POSITION[0] == 0: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
181 CDS_maybe_truncated = 1 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
182 else: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
183 CDS_maybe_truncated = 0 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
184 else: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
185 CDS_maybe_truncated = 0 |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
186 |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
187 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
188 ## 2.5 ## Test if this BETTER SEGMENT for a given ORF, is the better than the one for the other ORF (GET THE BEST ORF) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
189 ## Test whether it is the better ORF |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
190 if MAX > BEST_MAX: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
191 BEST_MAX = MAX |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
192 BEST_ORF = i+1 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
193 BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION = LONGUEST_SUBSEQUENCE_LIST_POSITION |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
194 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
195 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
196 ## 3 ## ONCE we have this better segment (BEST CODING SEGMENT) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
197 ## ==> GET THE STARTING and ENDING POSITIONS (in aa position and in nuc position) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
198 ## And get the INDEX of the best ORF [0, 1, or 2] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
199 if BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION != []: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
200 pos_MIN_aa = BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION[0] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
201 pos_MIN_aa = pos_MIN_aa - 1 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
202 pos_MAX_aa = BEST_LONGUEST_SUBSEQUENCE_LIST_POSITION[-1] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
203 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
204 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
205 BESTORF_bash_of_aligned_aa_seq = {} |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
206 BESTORF_bash_of_aligned_aa_seq_CODING = {} |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
207 for fasta_name in bash_of_aligned_aa_seq_3ORF.keys(): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
208 index_BEST_ORF = BEST_ORF-1 ### cause list going from 0 to 2 in LIST_3_ORF, while the ORF nb is indexed from 1 to 3 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
209 seq = bash_of_aligned_aa_seq_3ORF[fasta_name][index_BEST_ORF] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
210 seq_coding = seq[pos_MIN_aa:pos_MAX_aa] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
211 BESTORF_bash_of_aligned_aa_seq[fasta_name] = seq |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
212 BESTORF_bash_of_aligned_aa_seq_CODING[fasta_name] = seq_coding |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
213 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
214 ## 4 ## Get the corresponding position (START/END of BEST CODING SEGMENT) for nucleotides alignment |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
215 pos_MIN_nuc = pos_MIN_aa * 3 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
216 pos_MAX_nuc = pos_MAX_aa * 3 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
217 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
218 BESTORF_bash_aligned_nc_seq = {} |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
219 BESTORF_bash_aligned_nc_seq_CODING = {} |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
220 for fasta_name in bash_aligned_nc_seq.keys(): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
221 seq = bash_of_aligned_nuc_seq_3ORF[fasta_name][index_BEST_ORF] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
222 seq_coding = seq[pos_MIN_nuc:pos_MAX_nuc] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
223 BESTORF_bash_aligned_nc_seq[fasta_name] = seq |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
224 BESTORF_bash_aligned_nc_seq_CODING[fasta_name] = seq_coding |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
225 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
226 else: ### no CDS found ### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
227 BESTORF_bash_aligned_nc_seq = {} |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
228 BESTORF_bash_aligned_nc_seq_CODING = {} |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
229 BESTORF_bash_of_aligned_aa_seq = {} |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
230 BESTORF_bash_of_aligned_aa_seq_CODING ={} |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
231 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
232 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
233 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
234 ### Check whether their is a "M" or not, and if at least 1 "M" is present, that it is not in the last 50 aa |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
235 ########################################################################################################### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
236 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
237 BESTORF_bash_of_aligned_aa_seq_CDS_with_M = {} |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
238 BESTORF_bash_of_aligned_nuc_seq_CDS_with_M = {} |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
239 |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
240 Ortho = 0 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
241 for fasta_name in BESTORF_bash_of_aligned_aa_seq_CODING.keys(): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
242 seq_aa = BESTORF_bash_of_aligned_aa_seq_CODING[fasta_name] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
243 Ortho = detect_Methionine(seq_aa, Ortho) ### DEF6 ### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
244 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
245 ## CASE 1: A "M" is present and correctly localized (not in last 50 aa) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
246 if Ortho == 1: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
247 BESTORF_bash_of_aligned_aa_seq_CDS_with_M = BESTORF_bash_of_aligned_aa_seq_CODING |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
248 BESTORF_bash_of_aligned_nuc_seq_CDS_with_M = BESTORF_bash_aligned_nc_seq_CODING |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
249 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
250 ## CASE 2: in case the CDS is truncated, so the "M" is maybe missing: |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
251 if Ortho == 0 and CDS_maybe_truncated == 1: |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
252 BESTORF_bash_of_aligned_aa_seq_CDS_with_M = BESTORF_bash_of_aligned_aa_seq_CODING |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
253 BESTORF_bash_of_aligned_nuc_seq_CDS_with_M = BESTORF_bash_aligned_nc_seq_CODING |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
254 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
255 ## CASE 3: CDS not truncated AND no "M" found in good position (i.e. before the last 50 aa): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
256 ## => the 2 bash "CDS_with_M" are left empty ("{}") |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
257 |
|
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
258 return(BESTORF_bash_aligned_nc_seq, BESTORF_bash_aligned_nc_seq_CODING, BESTORF_bash_of_aligned_nuc_seq_CDS_with_M, BESTORF_bash_of_aligned_aa_seq, BESTORF_bash_of_aligned_aa_seq_CODING, BESTORF_bash_of_aligned_aa_seq_CDS_with_M) |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
259 ########################################################## |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
260 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
261 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
262 ################################################################################################## |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
263 ###### DEF 5 : Detect all indices corresponding to all occurance of a substring in a string ###### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
264 ################################################################################################## |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
265 def allindices(string, sub): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
266 listindex=[] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
267 offset=0 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
268 i = string.find(sub, offset) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
269 while i >= 0: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
270 listindex.append(i) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
271 i = string.find(sub, i + 1) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
272 return listindex |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
273 ###################################################### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
274 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
275 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
276 ############################################################ |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
277 ###### DEF 6 : Detect if methionin in the aa sequence ###### |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
278 ############################################################ |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
279 def detect_Methionine(seq_aa, Ortho): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
280 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
281 ln = len(seq_aa) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
282 nbre = sys.argv[2] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
283 CUTOFF_Last_50aa = ln - MINIMAL_CDS_LENGTH |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
284 #Ortho = 0 ## means orthologs not found |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
285 |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
286 ## Find all indices of occurances of "M" in a string of aa |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
287 list_indices = allindices(seq_aa, "M") ### DEF5 ### |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
288 |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
289 ## If some "M" are present, find whether the first "M" found is not in the 50 last aa (indice < CUTOFF_Last_50aa) ==> in this case: maybenot a CDS |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
290 if list_indices != []: |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
291 first_M = list_indices[0] |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
292 if first_M < CUTOFF_Last_50aa: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
293 Ortho = 1 ## means orthologs found |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
294 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
295 return(Ortho) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
296 ################################### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
297 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
298 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
299 ############################################################ |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
300 ###### DEF 7 : Reverse complement DNA sequence ###### |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
301 ###### Reference: http://crazyhottommy.blogspot.fr/2013/10/python-code-for-getting-reverse.html |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
302 ############################################################ |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
303 def ReverseComplement2(seq): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
304 # too lazy to construct the dictionary manually, use a dict comprehension |
|
9
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
305 seq1 = 'ATCGN-TAGCN-atcgn-tagcn-' |
|
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
306 seq_dict = { seq1[i]:seq1[i+6] for i in range(24) if i < 6 or 12<=i<16 } |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
307 return "".join([seq_dict[base] for base in reversed(seq)]) |
|
8
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
308 ############################ |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
309 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
310 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
311 ####################### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
312 ##### RUN RUN RUN ##### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
313 ####################### |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
314 import string, os, time, re, zipfile, sys |
|
3
ff98ed7849fa
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
abims-sbr
parents:
2
diff
changeset
|
315 from dico import dico |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
316 |
|
9
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
317 MINIMAL_CDS_LENGTH = int(sys.argv[2]) ## in aa number |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
318 |
|
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
319 ### Get Universal Code |
|
9
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
320 bash_codeUniversel = code_universel(sys.argv[1]) ### DEF2 ### |
|
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
321 |
|
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
322 ## INPUT from file containing list of species |
|
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
323 list_files = [] |
|
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
324 with open(sys.argv[3], 'r') as f: |
|
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
325 for line in f.readlines(): |
|
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
326 list_files.append(line.strip('\n')) |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
327 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
328 os.mkdir("04_BEST_ORF_nuc") |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
329 Path_OUT1 = "04_BEST_ORF_nuc" |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
330 os.mkdir("04_BEST_ORF_aa") |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
331 Path_OUT2 = "04_BEST_ORF_aa" |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
332 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
333 os.mkdir("05_CDS_nuc") |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
334 Path_OUT3 = "05_CDS_nuc" |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
335 os.mkdir("05_CDS_aa") |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
336 Path_OUT4 = "05_CDS_aa" |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
337 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
338 os.mkdir("06_CDS_with_M_nuc") |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
339 Path_OUT5 = "06_CDS_with_M_nuc" |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
340 os.mkdir("06_CDS_with_M_aa") |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
341 Path_OUT6 = "06_CDS_with_M_aa" |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
342 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
343 ### Get the Bash corresponding to an alignment file in fasta format |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
344 count_file_processed = 0 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
345 count_file_with_CDS = 0 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
346 count_file_without_CDS = 0 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
347 count_file_with_CDS_plus_M = 0 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
348 |
|
8
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
349 # ! : Currently, files are named "Orthogroup_x_y_sequences.fasta, where x is the number of the orthogroup (not important, juste here to make a distinct name), |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
350 # and y is the number of sequences/species in the group. These files are outputs of blastalign, where species can be removed. y is then modified. |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
351 |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
352 name_elems = ["orthogroup", "0", "with", "0", "species.fasta"] |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
353 |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
354 # by fixing the counter here, there will be some "holes" in the outputs directories (missing numbers), but the groups between directories will correspond |
|
9
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
355 #n0 = 0 |
|
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
356 |
|
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
357 for file in list_files: |
|
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
358 #n0 += 1 |
|
8
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
359 |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
360 count_file_processed = count_file_processed + 1 |
|
9
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
361 nb_gp = file.split('_')[1] # Keep trace of the orthogroup number |
|
7
35e39b4128ba
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents:
3
diff
changeset
|
362 fasta_file_path = "./%s" %file |
|
35e39b4128ba
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
abims-sbr
parents:
3
diff
changeset
|
363 bash_fasta = dico(fasta_file_path) ### DEF 1 ### |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
364 BESTORF_nuc, BESTORF_nuc_CODING, BESTORF_nuc_CDS_with_M, BESTORF_aa, BESTORF_aa_CODING, BESTORF_aa_CDS_with_M = find_good_ORF_criteria_3(bash_fasta, bash_codeUniversel) ### DEF 4 - PART 2 - ### |
|
8
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
365 |
|
9
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
366 name_elems[1] = nb_gp |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
367 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
368 ## a ## OUTPUT BESTORF_nuc |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
369 if BESTORF_nuc != {}: |
|
8
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
370 name_elems[3] = str(len(BESTORF_nuc.keys())) |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
371 new_name = "_".join(name_elems) |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
372 count_file_with_CDS = count_file_with_CDS +1 |
|
8
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
373 OUT1 = open("%s/%s" %(Path_OUT1,new_name), "w") |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
374 for fasta_name in BESTORF_nuc.keys(): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
375 seq = BESTORF_nuc[fasta_name] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
376 OUT1.write("%s\n" %fasta_name) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
377 OUT1.write("%s\n" %seq) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
378 OUT1.close() |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
379 else: |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
380 count_file_without_CDS = count_file_without_CDS + 1 |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
381 |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
382 ## b ## OUTPUT BESTORF_nuc_CODING ===> THE MOST INTERESTING!!! |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
383 if BESTORF_aa != {}: |
|
8
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
384 name_elems[3] = str(len(BESTORF_aa.keys())) |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
385 new_name = "_".join(name_elems) |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
386 OUT2 = open("%s/%s" %(Path_OUT2,new_name), "w") |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
387 for fasta_name in BESTORF_aa.keys(): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
388 seq = BESTORF_aa[fasta_name] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
389 OUT2.write("%s\n" %fasta_name) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
390 OUT2.write("%s\n" %seq) |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
391 OUT2.close() |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
392 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
393 ## c ## OUTPUT BESTORF_aa |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
394 if BESTORF_nuc_CODING != {}: |
|
8
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
395 name_elems[3] = str(len(BESTORF_nuc_CODING.keys())) |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
396 new_name = "_".join(name_elems) |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
397 OUT3 = open("%s/%s" %(Path_OUT3,new_name), "w") |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
398 for fasta_name in BESTORF_nuc_CODING.keys(): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
399 seq = BESTORF_nuc_CODING[fasta_name] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
400 OUT3.write("%s\n" %fasta_name) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
401 OUT3.write("%s\n" %seq) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
402 OUT3.close() |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
403 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
404 ## d ## OUTPUT BESTORF_aa_CODING |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
405 if BESTORF_aa_CODING != {}: |
|
8
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
406 name_elems[3] = str(len(BESTORF_aa_CODING.keys())) |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
407 new_name = "_".join(name_elems) |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
408 OUT4 = open("%s/%s" %(Path_OUT4,new_name), "w") |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
409 for fasta_name in BESTORF_aa_CODING.keys(): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
410 seq = BESTORF_aa_CODING[fasta_name] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
411 OUT4.write("%s\n" %fasta_name) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
412 OUT4.write("%s\n" %seq) |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
413 OUT4.close() |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
414 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
415 ## e ## OUTPUT BESTORF_nuc_CDS_with_M |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
416 if BESTORF_nuc_CDS_with_M != {}: |
|
8
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
417 name_elems[3] = str(len(BESTORF_nuc_CDS_with_M.keys())) |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
418 new_name = "_".join(name_elems) |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
419 count_file_with_CDS_plus_M = count_file_with_CDS_plus_M + 1 |
|
8
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
420 OUT5 = open("%s/%s" %(Path_OUT5,new_name), "w") |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
421 for fasta_name in BESTORF_nuc_CDS_with_M.keys(): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
422 seq = BESTORF_nuc_CDS_with_M[fasta_name] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
423 OUT5.write("%s\n" %fasta_name) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
424 OUT5.write("%s\n" %seq) |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
425 OUT5.close() |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
426 |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
427 ## f ## OUTPUT BESTORF_aa_CDS_with_M |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
428 if BESTORF_aa_CDS_with_M != {}: |
|
8
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
429 name_elems[3] = str(len(BESTORF_aa_CDS_with_M.keys())) |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
430 new_name = "_".join(name_elems) |
|
716a45028e55
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 90cfcf697b9f128e81bea1270378e59d63ab0a6f
abims-sbr
parents:
7
diff
changeset
|
431 OUT6 = open("%s/%s" %(Path_OUT6,new_name), "w") |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
432 for fasta_name in BESTORF_aa_CDS_with_M.keys(): |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
433 seq = BESTORF_aa_CDS_with_M[fasta_name] |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
434 OUT6.write("%s\n" %fasta_name) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
435 OUT6.write("%s\n" %seq) |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
436 OUT6.close() |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
437 |
|
9
640ef4c06ed5
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
abims-sbr
parents:
8
diff
changeset
|
438 #os.system("rm -rf %s" %file) |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
439 |
|
2
0d2f72caea10
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
abims-sbr
parents:
1
diff
changeset
|
440 ## Print |
|
1
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
441 print "*************** CDS detection ***************" |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
442 print "\nFiles processed: %d" %count_file_processed |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
443 print "\tFiles with CDS: %d" %count_file_with_CDS |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
444 print "\t\tFiles with CDS plus M (codon start): %d" %count_file_with_CDS_plus_M |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
445 print "\tFiles without CDS: %d \n" %count_file_without_CDS |
|
567d5b771a90
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
446 print "" |
