Mercurial > repos > abims-sbr > concatphyl
comparison scripts/S01_concatenate.py @ 7:59f4b9ffd92b draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
author | abims-sbr |
---|---|
date | Wed, 28 Feb 2018 10:39:19 -0500 |
parents | 1f8d039bd241 |
children | ff1a3a790363 |
comparison
equal
deleted
inserted
replaced
6:fd707de25e4f | 7:59f4b9ffd92b |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 ## Author: Eric Fontanillas | 2 ## Author: Eric Fontanillas |
3 ## Last modification: 17/06/2011 | 3 ## Last modification: 17/06/2011 |
4 ## Subject: find and remove indels | 4 ## Subject: find and remove indels |
5 | |
6 | 5 |
7 ############################### | 6 ############################### |
8 ##### DEF 0 : Dico fasta ##### | 7 ##### DEF 0 : Dico fasta ##### |
9 ############################### | 8 ############################### |
10 def dico(F2): | 9 def dico(F2): |
11 dicoco = {} | 10 dicoco = {} |
12 while 1: | 11 with open(F2, "r") as file: |
13 next2 = F2.readline() | 12 for name, query in itertools.izip_longest(*[file]*2): |
14 if not next2: | 13 if not name: |
15 break | 14 break |
16 if next2[0] == ">": | 15 if name[0] == ">": |
17 fasta_name_query = next2[:-1] | 16 fasta_name_query = name[:-1] |
18 Sn = string.split(fasta_name_query, "||") | 17 Sn = string.split(fasta_name_query, "||") |
19 fasta_name_query = Sn[0] | 18 fasta_name_query = Sn[0] |
20 next3 = F2.readline() | 19 fasta_seq_query = query[:-1] |
21 fasta_seq_query = next3[:-1] | 20 dicoco[fasta_name_query]=fasta_seq_query |
22 dicoco[fasta_name_query]=fasta_seq_query | 21 return dicoco |
23 return(dicoco) | |
24 ################################################################################### | 22 ################################################################################### |
25 | 23 |
26 | 24 |
27 #################### | 25 #################### |
28 ###### DEF 11 ###### | 26 ###### DEF 11 ###### |
43 list_genes_position=[] | 41 list_genes_position=[] |
44 ## 4.2 ## Concatenate | 42 ## 4.2 ## Concatenate |
45 for file in L_IN: | 43 for file in L_IN: |
46 nb_locus=nb_locus+1 | 44 nb_locus=nb_locus+1 |
47 | 45 |
48 ## a ## Open alignments | 46 ## a ## Open alignments |
49 file_IN = open(file, "r") | 47 dico_seq = dico(file) ### DEF 0 ### |
50 dico_seq = dico(file_IN) ### DEF 0 ### | |
51 file_IN.close() | |
52 ## b ## Get alignment length + genes positions for RAxML | 48 ## b ## Get alignment length + genes positions for RAxML |
53 key0 = dico_seq.keys()[0] | 49 key0 = dico_seq.keys()[0] |
54 ln = len(dico_seq[key0]) | 50 ln = len(dico_seq[key0]) |
55 ln_concat = ln_concat + ln | 51 ln_concat = ln_concat + ln |
56 | 52 |
121 | 117 |
122 | 118 |
123 ####################### | 119 ####################### |
124 ##### RUN RUN RUN ##### | 120 ##### RUN RUN RUN ##### |
125 ####################### | 121 ####################### |
126 import string, os, time, re, sys, zipfile | 122 import string, os, time, re, sys, itertools |
127 | 123 |
128 list_species = [] | 124 list_species = [] |
129 SPECIES_ID_LIST = [] | 125 SPECIES_ID_LIST = [] |
130 fasta = "^.*fasta$" | 126 fasta = "^.*fasta$" |
131 i=3 | 127 i=3 |