comparison scripts/S01_concatenate.py @ 7:59f4b9ffd92b draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit b7a3030ea134b5dfad89b1a869db659d72d1145c
author abims-sbr
date Wed, 28 Feb 2018 10:39:19 -0500
parents 1f8d039bd241
children ff1a3a790363
comparison
equal deleted inserted replaced
6:fd707de25e4f 7:59f4b9ffd92b
1 #!/usr/bin/python 1 #!/usr/bin/python
2 ## Author: Eric Fontanillas 2 ## Author: Eric Fontanillas
3 ## Last modification: 17/06/2011 3 ## Last modification: 17/06/2011
4 ## Subject: find and remove indels 4 ## Subject: find and remove indels
5
6 5
7 ############################### 6 ###############################
8 ##### DEF 0 : Dico fasta ##### 7 ##### DEF 0 : Dico fasta #####
9 ############################### 8 ###############################
10 def dico(F2): 9 def dico(F2):
11 dicoco = {} 10 dicoco = {}
12 while 1: 11 with open(F2, "r") as file:
13 next2 = F2.readline() 12 for name, query in itertools.izip_longest(*[file]*2):
14 if not next2: 13 if not name:
15 break 14 break
16 if next2[0] == ">": 15 if name[0] == ">":
17 fasta_name_query = next2[:-1] 16 fasta_name_query = name[:-1]
18 Sn = string.split(fasta_name_query, "||") 17 Sn = string.split(fasta_name_query, "||")
19 fasta_name_query = Sn[0] 18 fasta_name_query = Sn[0]
20 next3 = F2.readline() 19 fasta_seq_query = query[:-1]
21 fasta_seq_query = next3[:-1] 20 dicoco[fasta_name_query]=fasta_seq_query
22 dicoco[fasta_name_query]=fasta_seq_query 21 return dicoco
23 return(dicoco)
24 ################################################################################### 22 ###################################################################################
25 23
26 24
27 #################### 25 ####################
28 ###### DEF 11 ###### 26 ###### DEF 11 ######
43 list_genes_position=[] 41 list_genes_position=[]
44 ## 4.2 ## Concatenate 42 ## 4.2 ## Concatenate
45 for file in L_IN: 43 for file in L_IN:
46 nb_locus=nb_locus+1 44 nb_locus=nb_locus+1
47 45
48 ## a ## Open alignments 46 ## a ## Open alignments
49 file_IN = open(file, "r") 47 dico_seq = dico(file) ### DEF 0 ###
50 dico_seq = dico(file_IN) ### DEF 0 ###
51 file_IN.close()
52 ## b ## Get alignment length + genes positions for RAxML 48 ## b ## Get alignment length + genes positions for RAxML
53 key0 = dico_seq.keys()[0] 49 key0 = dico_seq.keys()[0]
54 ln = len(dico_seq[key0]) 50 ln = len(dico_seq[key0])
55 ln_concat = ln_concat + ln 51 ln_concat = ln_concat + ln
56 52
121 117
122 118
123 ####################### 119 #######################
124 ##### RUN RUN RUN ##### 120 ##### RUN RUN RUN #####
125 ####################### 121 #######################
126 import string, os, time, re, sys, zipfile 122 import string, os, time, re, sys, itertools
127 123
128 list_species = [] 124 list_species = []
129 SPECIES_ID_LIST = [] 125 SPECIES_ID_LIST = []
130 fasta = "^.*fasta$" 126 fasta = "^.*fasta$"
131 i=3 127 i=3