Mercurial > repos > abims-sbr > concatphyl
annotate scripts/S01_concatenate.py @ 1:f181bd945a6c draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
author | lecorguille |
---|---|
date | Thu, 13 Apr 2017 09:48:47 -0400 |
parents | |
children | 1f8d039bd241 |
rev | line source |
---|---|
1
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
1 #!/usr/bin/python |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
2 ## Author: Eric Fontanillas |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
3 ## Last modification: 17/06/2011 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
4 ## Subject: find and remove indels |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
5 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
6 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
7 ############################### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
8 ##### DEF 0 : Dico fasta ##### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
9 ############################### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
10 def dico(F2): |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
11 dicoco = {} |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
12 while 1: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
13 next2 = F2.readline() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
14 if not next2: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
15 break |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
16 if next2[0] == ">": |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
17 fasta_name_query = next2[:-1] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
18 Sn = string.split(fasta_name_query, "||") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
19 fasta_name_query = Sn[0] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
20 next3 = F2.readline() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
21 fasta_seq_query = next3[:-1] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
22 dicoco[fasta_name_query]=fasta_seq_query |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
23 return(dicoco) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
24 ################################################################################### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
25 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
26 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
27 #################### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
28 ###### DEF 11 ###### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
29 #################### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
30 ## Concatenate sequences |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
31 ########################### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
32 def concatenate(folder_with_loci, SPECIES_ID_LIST): |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
33 ## 4 ## Process files |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
34 ## 4.1 ## Create the bash and the fasta names entries (name of the species) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
35 bash_concat = {} |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
36 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
37 for species_ID in SPECIES_ID_LIST: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
38 bash_concat[species_ID] = '' |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
39 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
40 ln_concat = 0 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
41 nb_locus = 0 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
42 pos=1 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
43 list_genes_position=[] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
44 ## 4.2 ## Concatenate |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
45 for file in L_IN: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
46 nb_locus=nb_locus+1 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
47 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
48 ## a ## Open alignments |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
49 file_IN = open("%s/%s" %(folder_with_loci, file), "r") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
50 dico_seq = dico(file_IN) ### DEF 0 ### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
51 file_IN.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
52 ## b ## Get alignment length + genes positions for RAxML |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
53 key0 = dico_seq.keys()[0] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
54 ln = len(dico_seq[key0]) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
55 ln_concat = ln_concat + ln |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
56 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
57 pos_start = pos |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
58 pos_end = pos+ln-1 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
59 pos=pos_end+1 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
60 position="%d-%d" %(pos_start, pos_end) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
61 RAxML_name = file[:-6] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
62 sublist = [RAxML_name, position] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
63 list_genes_position.append(sublist) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
64 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
65 ## c ## Generate "empty" sequence with alignment length * "-" |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
66 empty_seq = "-" * ln |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
67 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
68 ## d ## Concatenate |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
69 ## d.1 ## Detect missing species in this alignment |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
70 list_ID=[] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
71 list_absent_ID=[] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
72 bash_fastaName={} |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
73 for fasta_name in dico_seq: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
74 ID = fasta_name[1:3] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
75 list_ID.append(ID) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
76 seq = dico_seq[fasta_name] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
77 bash_fastaName[ID]=fasta_name |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
78 for sp_ID in SPECIES_ID_LIST: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
79 if sp_ID not in list_ID: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
80 list_absent_ID.append(sp_ID) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
81 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
82 for ID in SPECIES_ID_LIST: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
83 if ID in list_absent_ID: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
84 bash_concat[ID] = bash_concat[ID] + empty_seq |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
85 else: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
86 fasta_name = bash_fastaName[ID] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
87 seq = dico_seq[fasta_name] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
88 bash_concat[ID] = bash_concat[ID] + seq |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
89 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
90 return(bash_concat, ln_concat, nb_locus, list_genes_position) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
91 #################################### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
92 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
93 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
94 ######################################## |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
95 ##### DEF 12 : get codon position ##### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
96 ######################################## |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
97 def get_codon_position(seq_inORF): |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
98 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
99 ln = len(seq_inORF) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
100 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
101 i=0 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
102 seq_pos1="" |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
103 seq_pos2="" |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
104 seq_pos12="" |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
105 seq_pos3="" |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
106 while i<ln: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
107 pos1 = seq_inORF[i] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
108 pos2 = seq_inORF[i+1] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
109 pos3 = seq_inORF[i+2] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
110 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
111 seq_pos1 = seq_pos1 + pos1 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
112 seq_pos2 = seq_pos2 + pos2 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
113 seq_pos12 = seq_pos12 + pos1 + pos2 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
114 seq_pos3 = seq_pos3 + pos3 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
115 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
116 i = i+3 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
117 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
118 return(seq_pos1, seq_pos2, seq_pos12, seq_pos3) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
119 ############################################################################### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
120 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
121 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
122 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
123 ####################### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
124 ##### RUN RUN RUN ##### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
125 ####################### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
126 import string, os, time, re, sys, zipfile |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
127 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
128 list_species = [] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
129 SPECIES_ID_LIST = [] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
130 fasta = "^.*fasta$" |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
131 i=3 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
132 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
133 ## add file to list_species |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
134 zfile = zipfile.ZipFile(sys.argv[1]) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
135 for name in zfile.namelist() : |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
136 list_species.append(name) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
137 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
138 ## in SPECIES_ID_LIST, only the 2 first letters of name of species |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
139 for name in list_species : |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
140 name = name[:2] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
141 SPECIES_ID_LIST.append(name) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
142 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
143 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
144 ### 1 ### Proteic |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
145 if sys.argv[2] == "proteic" : |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
146 os.mkdir("02_CDS_No_Missing_Data_aa") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
147 zfile_nuc = zipfile.ZipFile(sys.argv[3]) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
148 for name in zfile_nuc.namelist() : |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
149 zfile_nuc.extract(name, "./02_CDS_No_Missing_Data_aa") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
150 path_IN = "./02_CDS_No_Missing_Data_aa" |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
151 L_IN = os.listdir(path_IN) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
152 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
153 OUT1 = open("02_Concatenation_aa.fas", "w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
154 OUT2 = open("02_Concatenation_aa.phy", "w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
155 OUT3 = open("02_Concatenation_aa.nex", "w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
156 OUT_PARTITION_gene_AA = open("06_partitions_gene_AA","w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
157 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
158 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
159 ## Get bash with concatenation |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
160 bash_concatenation, ln, nb_locus,list_genes_position= concatenate(path_IN, SPECIES_ID_LIST) ### DEF 11 ## |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
161 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
162 ## Write gene AA partition file for RAxML |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
163 for sublist in list_genes_position: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
164 name = sublist[0] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
165 positions=sublist[1] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
166 OUT_PARTITION_gene_AA.write("DNA,%s=%s\n"%(name,positions)) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
167 OUT_PARTITION_gene_AA.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
168 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
169 ## Get "ntax" for NEXUS HEADER |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
170 nb_taxa = len(bash_concatenation.keys()) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
171 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
172 print "******************** CONCATENATION ********************\n" |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
173 print "Process amino-acid concatenation:" |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
174 print "\tNumber of taxa aligned = %d" %nb_taxa |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
175 print "\tNumber of loci concatenated = %d\n" %nb_locus |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
176 print "\tTotal length of the concatenated sequences = %d" %ln |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
177 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
178 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
179 ## Print NEXUS HEADER: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
180 OUT3.write("#NEXUS\n\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
181 OUT3.write("Begin data;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
182 OUT3.write("\tDimensions ntax=%d nchar=%d;\n" %(nb_taxa, ln)) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
183 OUT3.write("\tFormat datatype=aa gap=-;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
184 OUT3.write("\tMatrix\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
185 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
186 ## Print PHYLIP HEADER: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
187 OUT2.write(" %d %d\n" %(nb_taxa, ln)) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
188 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
189 ## 3.5 ## Print outputs |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
190 for seq_name in bash_concatenation.keys(): |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
191 seq = bash_concatenation[seq_name] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
192 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
193 ## Filtering the sequence in case of remaining "?" |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
194 seq = string.replace(seq, "?", "-") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
195 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
196 #print seq FASTA FORMAT |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
197 OUT1.write(">%s\n" %seq_name) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
198 OUT1.write("%s\n" %seq) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
199 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
200 #print seq PHYLIP FORMAT |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
201 OUT2.write("%s\n" %seq_name) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
202 OUT2.write("%s\n" %seq) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
203 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
204 #print seq NEXUS FORMAT |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
205 OUT3.write("%s" %seq_name) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
206 OUT3.write(" %s\n" %seq) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
207 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
208 OUT3.write("\t;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
209 OUT3.write("End;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
210 OUT1.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
211 OUT2.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
212 OUT2.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
213 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
214 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
215 ### 2 ### Nucleic |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
216 elif sys.argv[2] == "nucleic" : |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
217 os.mkdir("02_CDS_No_Missing_Data_nuc") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
218 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
219 zfile_nuc = zipfile.ZipFile(sys.argv[3]) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
220 for name in zfile_nuc.namelist() : |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
221 zfile_nuc.extract(name, "./02_CDS_No_Missing_Data_nuc") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
222 path_IN = "./02_CDS_No_Missing_Data_nuc" |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
223 L_IN = os.listdir(path_IN) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
224 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
225 OUT1 = open("03_Concatenation_nuc.fas", "w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
226 OUT2 = open("03_Concatenation_nuc.phy", "w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
227 OUT3 = open("03_Concatenation_nuc.nex", "w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
228 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
229 OUT1_pos12 = open("03_Concatenation_pos12_nuc.fas", "w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
230 OUT2_pos12 = open("03_Concatenation_pos12_nuc.phy", "w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
231 OUT3_pos12 = open("03_Concatenation_pos12_nuc.nex", "w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
232 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
233 OUT1_pos3 = open("03_Concatenation_pos3_nuc.fas", "w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
234 OUT2_pos3 = open("03_Concatenation_pos3_nuc.phy", "w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
235 OUT3_pos3 = open("03_Concatenation_pos3_nuc.nex", "w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
236 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
237 OUT_PARTITION_codon_12_3 = open("05_partitions_codon12_3","w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
238 OUT_PARTITION_gene_NUC = open("05_partitions_gene_NUC","w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
239 OUT_PARTITION_gene_PLUS_codon_12_3 = open("05_partitions_gene_PLUS_codon12_3","w") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
240 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
241 ## Get bash with concatenation |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
242 bash_concatenation, ln, nb_locus, list_genes_position = concatenate(path_IN, SPECIES_ID_LIST) ### DEF 11 ## |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
243 ln_12 = ln/3*2 ### length of the alignment when only the 2 first codon position |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
244 ln_3 = ln/3 ### length of the alignment when only the third codon position |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
245 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
246 ## Write partition files for RAxML subsequent runs |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
247 # a # Codon partition |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
248 OUT_PARTITION_codon_12_3.write("DNA, p1=1-%d\\3,2-%d\\3\n" %(ln, ln)) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
249 OUT_PARTITION_codon_12_3.write("DNA, p2=3-%d\\3\n" %(ln)) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
250 OUT_PARTITION_codon_12_3.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
251 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
252 # b # Gene partition |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
253 for sublist in list_genes_position: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
254 name=sublist[0] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
255 positions=sublist[1] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
256 OUT_PARTITION_gene_NUC.write("DNA,%s=%s\n"%(name,positions)) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
257 OUT_PARTITION_gene_NUC.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
258 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
259 # c # Mixed partition (codon + gene) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
260 for sublist in list_genes_position: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
261 name = sublist[0] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
262 positions = sublist[1] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
263 S1 = string.split(positions, "-") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
264 pos_start1 = string.atoi(S1[0]) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
265 pos_end = string.atoi(S1[1]) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
266 pos_start2=pos_start1+1 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
267 pos_start3=pos_start2+1 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
268 partition1 = "DNA, %s_1=%d-%d\\3,%d-%d\\3\n" %(name,pos_start1, pos_end, pos_start2, pos_end) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
269 partition2 = "DNA, %s_2=%d-%d\\3\n" %(name,pos_start3, pos_end) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
270 OUT_PARTITION_gene_PLUS_codon_12_3.write(partition1) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
271 OUT_PARTITION_gene_PLUS_codon_12_3.write(partition2) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
272 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
273 OUT_PARTITION_gene_PLUS_codon_12_3.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
274 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
275 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
276 ## Get "ntax" for NEXUS HEADER |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
277 nb_taxa = len(bash_concatenation.keys()) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
278 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
279 print "******************** CONCATENATION ********************\n" |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
280 print "Process nucleotides concatenation:" |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
281 print "\tNumber of taxa aligned = %d" %nb_taxa |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
282 print "\tNumber of loci concatenated = %d\n" %nb_locus |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
283 print "\tTotal length of the concatenated sequences [All codon positions] = %d" %ln |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
284 print "\t\tTotal length of the concatenated sequences [Codon positions 1 & 2] = %d" %ln_12 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
285 print "\t\tTotal length of the concatenated sequences [Codon position 3] = %d" %ln_3 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
286 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
287 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
288 ## Print NEXUS HEADER: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
289 OUT3.write("#NEXUS\n\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
290 OUT3.write("Begin data;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
291 OUT3.write("\tDimensions ntax=%d nchar=%d;\n" %(nb_taxa, ln)) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
292 OUT3.write("\tFormat datatype=dna gap=-;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
293 OUT3.write("\tMatrix\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
294 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
295 OUT3_pos12.write("#NEXUS\n\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
296 OUT3_pos12.write("Begin data;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
297 OUT3_pos12.write("\tDimensions ntax=%d nchar=%d;\n" %(nb_taxa, ln_12)) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
298 OUT3_pos12.write("\tFormat datatype=dna gap=-;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
299 OUT3_pos12.write("\tMatrix\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
300 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
301 OUT3_pos3.write("#NEXUS\n\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
302 OUT3_pos3.write("Begin data;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
303 OUT3_pos3.write("\tDimensions ntax=%d nchar=%d;\n" %(nb_taxa, ln_3)) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
304 OUT3_pos3.write("\tFormat datatype=dna gap=-;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
305 OUT3_pos3.write("\tMatrix\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
306 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
307 ## Print PHYLIP HEADER: |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
308 OUT2.write(" %d %d\n" %(nb_taxa, ln)) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
309 OUT2_pos12.write(" %d %d\n" %(nb_taxa, ln_12)) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
310 OUT2_pos3.write(" %d %d\n" %(nb_taxa, ln_3)) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
311 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
312 ## Print outputs |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
313 for seq_name in bash_concatenation.keys(): |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
314 seq = bash_concatenation[seq_name] |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
315 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
316 ## Filtering the sequence in case of remaining "?" |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
317 seq = string.replace(seq, "?", "-") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
318 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
319 ## Get the differentes codons partitions |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
320 seq_pos1, seq_pos2, seq_pos12, seq_pos3 = get_codon_position(seq) ### DEF 12 ### |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
321 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
322 #print seq FASTA FORMAT |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
323 OUT1.write(">%s\n" %seq_name) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
324 OUT1.write("%s\n" %seq) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
325 OUT1_pos12.write(">%s\n" %seq_name) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
326 OUT1_pos12.write("%s\n" %seq_pos12) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
327 OUT1_pos3.write(">%s\n" %seq_name) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
328 OUT1_pos3.write("%s\n" %seq_pos3) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
329 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
330 #print seq PHYLIP FORMAT |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
331 OUT2.write("%s\n" %seq_name) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
332 OUT2.write("%s\n" %seq) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
333 OUT2_pos12.write("%s\n" %seq_name) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
334 OUT2_pos12.write("%s\n" %seq_pos12) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
335 OUT2_pos3.write("%s\n" %seq_name) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
336 OUT2_pos3.write("%s\n" %seq_pos3) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
337 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
338 #print seq NEXUS FORMAT |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
339 OUT3.write("%s" %seq_name) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
340 OUT3.write(" %s\n" %seq) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
341 OUT3_pos12.write("%s" %seq_name) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
342 OUT3_pos12.write(" %s\n" %seq_pos12) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
343 OUT3_pos3.write("%s" %seq_name) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
344 OUT3_pos3.write(" %s\n" %seq_pos3) |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
345 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
346 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
347 OUT3.write("\t;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
348 OUT3.write("End;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
349 OUT3_pos12.write("\t;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
350 OUT3_pos12.write("End;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
351 OUT3_pos3.write("\t;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
352 OUT3_pos3.write("End;\n") |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
353 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
354 OUT1.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
355 OUT2.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
356 OUT3.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
357 OUT1_pos12.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
358 OUT2_pos12.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
359 OUT3_pos12.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
360 OUT1_pos3.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
361 OUT2_pos3.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
362 OUT3_pos3.close() |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
363 |
f181bd945a6c
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
lecorguille
parents:
diff
changeset
|
364 print "\n\n\n******************** RAxML RUN ********************\n" |