comparison scripts/S02b_study_seq_composition_nuc.py @ 1:8de21b6eb110 draft

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
author abims-sbr
date Wed, 27 Sep 2017 10:04:08 -0400
parents 78dd6454f6f0
children 988467f963f0
comparison
equal deleted inserted replaced
0:78dd6454f6f0 1:8de21b6eb110
2 ## Author: Eric FONTANILLAS 2 ## Author: Eric FONTANILLAS
3 ## Date: 21.12.10 3 ## Date: 21.12.10
4 ## Object: Test for compositional bias in genome and proteome as marker of thermal adaptation (comparison between 2 "hot" species: Ap and Ps and one "cold" species: Pg) 4 ## Object: Test for compositional bias in genome and proteome as marker of thermal adaptation (comparison between 2 "hot" species: Ap and Ps and one "cold" species: Pg)
5 5
6 6
7 import sys,os,zipfile,shutil,subprocess 7 import sys,os,shutil,subprocess, string
8 8
9 ############# 9 #############
10 ### DEF 0 ### 10 ### DEF 0 ###
11 #############import sys,os,zipfile 11
12 def simplify_fasta_name(fasta_name,LT): 12 def simplify_fasta_name(fasta_name,LT):
13 for abbreviation in LT: 13 for abbreviation in LT:
14 if abbreviation in fasta_name: 14 if abbreviation in fasta_name:
15 new_fasta_name = abbreviation 15 new_fasta_name = abbreviation
16 16
138 ############################################## 138 ##############################################
139 139
140 ################### 140 ###################
141 ### RUN RUN RUN ### 141 ### RUN RUN RUN ###
142 ################### 142 ###################
143 import string, os,sys,zipfile
144
145
146
147
148 143
149 ##Create specific folders 144 ##Create specific folders
150 Path_IN_loci_NUC = "./IN_NUC" 145 Path_IN_loci_NUC = "./IN_NUC"
151 outpath= "./OUT" 146 outpath= "./OUT"
152 os.makedirs(Path_IN_loci_NUC) 147 os.makedirs(Path_IN_loci_NUC)
153 os.makedirs(outpath) 148 os.makedirs(outpath)
154 149
155 150 infiles = str.split(sys.argv[1], ",")
156 151 for file in infiles:
157 152 os.system("cp %s %s" %(file, Path_IN_loci_NUC))
158 #Check if the file is a zip or fasta file
159
160 the_zip_file = zipfile.ZipFile(sys.argv[1])
161 ret = the_zip_file.testzip()
162
163 if ret is not None:
164 shutil.copy2(sys.argv[1], './IN_NUC/input.fasta')
165 else:
166 cmd="unzip %s -d ./IN_NUC"%(sys.argv[1])
167 os.system(cmd)
168
169 153
170 ## 1 ## List taxa 154 ## 1 ## List taxa
171 LT=[] 155 LT=[]
172 cmd="grep '>' %s" % sys.argv[2] 156 cmd="grep '>' %s" % sys.argv[2]
173 result = subprocess.check_output(cmd, shell=True) 157 result = subprocess.check_output(cmd, shell=True)
186 Lloci_NUC = os.listdir(Path_IN_loci_NUC) 170 Lloci_NUC = os.listdir(Path_IN_loci_NUC)
187 171
188 172
189 ## 3 ## PathOUT 173 ## 3 ## PathOUT
190 ## 3.1 ## NUC composition 174 ## 3.1 ## NUC composition
191 fileOUT_NUC=open("./OUT/10_nuc_compositions.csv","w") 175 fileOUT_NUC=open("./OUT/nuc_compositions.csv","w")
192 fileOUT_NUC.write("LOCUS,") 176 fileOUT_NUC.write("LOCUS,")
193 for taxa in LT: 177 for taxa in LT:
194 fileOUT_NUC.write("%s_prop_A,%s_prop_T,%s_prop_C,%s_prop_G," %(taxa,taxa,taxa,taxa)) 178 fileOUT_NUC.write("%s_prop_A,%s_prop_T,%s_prop_C,%s_prop_G," %(taxa,taxa,taxa,taxa))
195 fileOUT_NUC.write("\n") 179 fileOUT_NUC.write("\n")
196 180
197 ## 3.2 ## NUC percent_GC 181 ## 3.2 ## NUC percent_GC
198 fileOUT_percent_GC=open("./OUT/11_percent_GC.csv","w") 182 fileOUT_percent_GC=open("./OUT/percent_GC.csv","w")
199 fileOUT_percent_GC.write("LOCUS,") 183 fileOUT_percent_GC.write("LOCUS,")
200 for taxa in LT: 184 for taxa in LT:
201 fileOUT_percent_GC.write("%s_percent_GC," %(taxa)) 185 fileOUT_percent_GC.write("%s_percent_GC," %(taxa))
202 fileOUT_percent_GC.write("\n") 186 fileOUT_percent_GC.write("\n")
203 187
204 ## 3.3 ## NUC percent_purine 188 ## 3.3 ## NUC percent_purine
205 fileOUT_percent_purine=open("./OUT/12_percent_purine.csv","w") 189 fileOUT_percent_purine=open("./OUT/percent_purine.csv","w")
206 fileOUT_percent_purine.write("LOCUS,") 190 fileOUT_percent_purine.write("LOCUS,")
207 for taxa in LT: 191 for taxa in LT:
208 fileOUT_percent_purine.write("%s_percent_purine," %(taxa)) 192 fileOUT_percent_purine.write("%s_percent_purine," %(taxa))
209 fileOUT_percent_purine.write("\n") 193 fileOUT_percent_purine.write("\n")
210 194
211 ## 3.4 ## Purine Load 195 ## 3.4 ## Purine Load
212 fileOUT_Purine_Load=open("./OUT/12_Purine_Load_Indice.csv", "w") 196 fileOUT_Purine_Load=open("./OUT/Purine_Load_Indice.csv", "w")
213 fileOUT_Purine_Load.write("LOCUS,") 197 fileOUT_Purine_Load.write("LOCUS,")
214 for taxa in LT: 198 for taxa in LT:
215 fileOUT_Purine_Load.write("%s_TOTAL,%s_DIFF_GC,%s_DIFF_AT,%s_PLI_GC1000,%s_PLI_AT1000," %(taxa,taxa,taxa,taxa,taxa)) 199 fileOUT_Purine_Load.write("%s_TOTAL,%s_DIFF_GC,%s_DIFF_AT,%s_PLI_GC1000,%s_PLI_AT1000," %(taxa,taxa,taxa,taxa,taxa))
216 fileOUT_Purine_Load.write("\n") 200 fileOUT_Purine_Load.write("\n")
217 201
225 209
226 fileOUT_NUC.write("%s," %locus) 210 fileOUT_NUC.write("%s," %locus)
227 fileOUT_percent_GC.write("%s," %locus) 211 fileOUT_percent_GC.write("%s," %locus)
228 fileOUT_percent_purine.write("%s," %locus) 212 fileOUT_percent_purine.write("%s," %locus)
229 fileOUT_Purine_Load.write("%s," %locus) 213 fileOUT_Purine_Load.write("%s," %locus)
230 #print bash 214
231 for taxa in LT: 215 if taxa in bash.keys():
232 print taxa
233 if taxa in bash.keys():
234 seq = bash[taxa] 216 seq = bash[taxa]
235 percent_GC, percent_purine,prop_A, prop_T, prop_C, prop_G = base_composition(seq) ### DEF2 ### 217 percent_GC, percent_purine,prop_A, prop_T, prop_C, prop_G = base_composition(seq) ### DEF2 ###
236 TOTAL, DIFF_GC, DIFF_AT,PLI_GC,PLI_AT,PLI_GC_1000,PLI_AT_1000 = purine_loading(seq) ### DEF3 ### 218 TOTAL, DIFF_GC, DIFF_AT,PLI_GC,PLI_AT,PLI_GC_1000,PLI_AT_1000 = purine_loading(seq) ### DEF3 ###
237 fileOUT_NUC.write("%.5f,%.5f,%.5f,%.5f," %(prop_A,prop_T,prop_C,prop_G)) 219 fileOUT_NUC.write("%.5f,%.5f,%.5f,%.5f," %(prop_A,prop_T,prop_C,prop_G))
238 fileOUT_percent_GC.write("%.5f," %percent_GC) 220 fileOUT_percent_GC.write("%.5f," %percent_GC)
244 fileOUT_Purine_Load.write("\n") 226 fileOUT_Purine_Load.write("\n")
245 fileOUT_NUC.close() 227 fileOUT_NUC.close()
246 fileOUT_percent_GC.close() 228 fileOUT_percent_GC.close()
247 fileOUT_percent_purine.close() 229 fileOUT_percent_purine.close()
248 fileOUT_Purine_Load.close() 230 fileOUT_Purine_Load.close()
231