Mercurial > repos > abims-sbr > mutcount
comparison scripts/S02b_study_seq_composition_nuc.py @ 1:8de21b6eb110 draft
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
| author | abims-sbr |
|---|---|
| date | Wed, 27 Sep 2017 10:04:08 -0400 |
| parents | 78dd6454f6f0 |
| children | 988467f963f0 |
comparison
equal
deleted
inserted
replaced
| 0:78dd6454f6f0 | 1:8de21b6eb110 |
|---|---|
| 2 ## Author: Eric FONTANILLAS | 2 ## Author: Eric FONTANILLAS |
| 3 ## Date: 21.12.10 | 3 ## Date: 21.12.10 |
| 4 ## Object: Test for compositional bias in genome and proteome as marker of thermal adaptation (comparison between 2 "hot" species: Ap and Ps and one "cold" species: Pg) | 4 ## Object: Test for compositional bias in genome and proteome as marker of thermal adaptation (comparison between 2 "hot" species: Ap and Ps and one "cold" species: Pg) |
| 5 | 5 |
| 6 | 6 |
| 7 import sys,os,zipfile,shutil,subprocess | 7 import sys,os,shutil,subprocess, string |
| 8 | 8 |
| 9 ############# | 9 ############# |
| 10 ### DEF 0 ### | 10 ### DEF 0 ### |
| 11 #############import sys,os,zipfile | 11 |
| 12 def simplify_fasta_name(fasta_name,LT): | 12 def simplify_fasta_name(fasta_name,LT): |
| 13 for abbreviation in LT: | 13 for abbreviation in LT: |
| 14 if abbreviation in fasta_name: | 14 if abbreviation in fasta_name: |
| 15 new_fasta_name = abbreviation | 15 new_fasta_name = abbreviation |
| 16 | 16 |
| 138 ############################################## | 138 ############################################## |
| 139 | 139 |
| 140 ################### | 140 ################### |
| 141 ### RUN RUN RUN ### | 141 ### RUN RUN RUN ### |
| 142 ################### | 142 ################### |
| 143 import string, os,sys,zipfile | |
| 144 | |
| 145 | |
| 146 | |
| 147 | |
| 148 | 143 |
| 149 ##Create specific folders | 144 ##Create specific folders |
| 150 Path_IN_loci_NUC = "./IN_NUC" | 145 Path_IN_loci_NUC = "./IN_NUC" |
| 151 outpath= "./OUT" | 146 outpath= "./OUT" |
| 152 os.makedirs(Path_IN_loci_NUC) | 147 os.makedirs(Path_IN_loci_NUC) |
| 153 os.makedirs(outpath) | 148 os.makedirs(outpath) |
| 154 | 149 |
| 155 | 150 infiles = str.split(sys.argv[1], ",") |
| 156 | 151 for file in infiles: |
| 157 | 152 os.system("cp %s %s" %(file, Path_IN_loci_NUC)) |
| 158 #Check if the file is a zip or fasta file | |
| 159 | |
| 160 the_zip_file = zipfile.ZipFile(sys.argv[1]) | |
| 161 ret = the_zip_file.testzip() | |
| 162 | |
| 163 if ret is not None: | |
| 164 shutil.copy2(sys.argv[1], './IN_NUC/input.fasta') | |
| 165 else: | |
| 166 cmd="unzip %s -d ./IN_NUC"%(sys.argv[1]) | |
| 167 os.system(cmd) | |
| 168 | |
| 169 | 153 |
| 170 ## 1 ## List taxa | 154 ## 1 ## List taxa |
| 171 LT=[] | 155 LT=[] |
| 172 cmd="grep '>' %s" % sys.argv[2] | 156 cmd="grep '>' %s" % sys.argv[2] |
| 173 result = subprocess.check_output(cmd, shell=True) | 157 result = subprocess.check_output(cmd, shell=True) |
| 186 Lloci_NUC = os.listdir(Path_IN_loci_NUC) | 170 Lloci_NUC = os.listdir(Path_IN_loci_NUC) |
| 187 | 171 |
| 188 | 172 |
| 189 ## 3 ## PathOUT | 173 ## 3 ## PathOUT |
| 190 ## 3.1 ## NUC composition | 174 ## 3.1 ## NUC composition |
| 191 fileOUT_NUC=open("./OUT/10_nuc_compositions.csv","w") | 175 fileOUT_NUC=open("./OUT/nuc_compositions.csv","w") |
| 192 fileOUT_NUC.write("LOCUS,") | 176 fileOUT_NUC.write("LOCUS,") |
| 193 for taxa in LT: | 177 for taxa in LT: |
| 194 fileOUT_NUC.write("%s_prop_A,%s_prop_T,%s_prop_C,%s_prop_G," %(taxa,taxa,taxa,taxa)) | 178 fileOUT_NUC.write("%s_prop_A,%s_prop_T,%s_prop_C,%s_prop_G," %(taxa,taxa,taxa,taxa)) |
| 195 fileOUT_NUC.write("\n") | 179 fileOUT_NUC.write("\n") |
| 196 | 180 |
| 197 ## 3.2 ## NUC percent_GC | 181 ## 3.2 ## NUC percent_GC |
| 198 fileOUT_percent_GC=open("./OUT/11_percent_GC.csv","w") | 182 fileOUT_percent_GC=open("./OUT/percent_GC.csv","w") |
| 199 fileOUT_percent_GC.write("LOCUS,") | 183 fileOUT_percent_GC.write("LOCUS,") |
| 200 for taxa in LT: | 184 for taxa in LT: |
| 201 fileOUT_percent_GC.write("%s_percent_GC," %(taxa)) | 185 fileOUT_percent_GC.write("%s_percent_GC," %(taxa)) |
| 202 fileOUT_percent_GC.write("\n") | 186 fileOUT_percent_GC.write("\n") |
| 203 | 187 |
| 204 ## 3.3 ## NUC percent_purine | 188 ## 3.3 ## NUC percent_purine |
| 205 fileOUT_percent_purine=open("./OUT/12_percent_purine.csv","w") | 189 fileOUT_percent_purine=open("./OUT/percent_purine.csv","w") |
| 206 fileOUT_percent_purine.write("LOCUS,") | 190 fileOUT_percent_purine.write("LOCUS,") |
| 207 for taxa in LT: | 191 for taxa in LT: |
| 208 fileOUT_percent_purine.write("%s_percent_purine," %(taxa)) | 192 fileOUT_percent_purine.write("%s_percent_purine," %(taxa)) |
| 209 fileOUT_percent_purine.write("\n") | 193 fileOUT_percent_purine.write("\n") |
| 210 | 194 |
| 211 ## 3.4 ## Purine Load | 195 ## 3.4 ## Purine Load |
| 212 fileOUT_Purine_Load=open("./OUT/12_Purine_Load_Indice.csv", "w") | 196 fileOUT_Purine_Load=open("./OUT/Purine_Load_Indice.csv", "w") |
| 213 fileOUT_Purine_Load.write("LOCUS,") | 197 fileOUT_Purine_Load.write("LOCUS,") |
| 214 for taxa in LT: | 198 for taxa in LT: |
| 215 fileOUT_Purine_Load.write("%s_TOTAL,%s_DIFF_GC,%s_DIFF_AT,%s_PLI_GC1000,%s_PLI_AT1000," %(taxa,taxa,taxa,taxa,taxa)) | 199 fileOUT_Purine_Load.write("%s_TOTAL,%s_DIFF_GC,%s_DIFF_AT,%s_PLI_GC1000,%s_PLI_AT1000," %(taxa,taxa,taxa,taxa,taxa)) |
| 216 fileOUT_Purine_Load.write("\n") | 200 fileOUT_Purine_Load.write("\n") |
| 217 | 201 |
| 225 | 209 |
| 226 fileOUT_NUC.write("%s," %locus) | 210 fileOUT_NUC.write("%s," %locus) |
| 227 fileOUT_percent_GC.write("%s," %locus) | 211 fileOUT_percent_GC.write("%s," %locus) |
| 228 fileOUT_percent_purine.write("%s," %locus) | 212 fileOUT_percent_purine.write("%s," %locus) |
| 229 fileOUT_Purine_Load.write("%s," %locus) | 213 fileOUT_Purine_Load.write("%s," %locus) |
| 230 #print bash | 214 |
| 231 for taxa in LT: | 215 if taxa in bash.keys(): |
| 232 print taxa | |
| 233 if taxa in bash.keys(): | |
| 234 seq = bash[taxa] | 216 seq = bash[taxa] |
| 235 percent_GC, percent_purine,prop_A, prop_T, prop_C, prop_G = base_composition(seq) ### DEF2 ### | 217 percent_GC, percent_purine,prop_A, prop_T, prop_C, prop_G = base_composition(seq) ### DEF2 ### |
| 236 TOTAL, DIFF_GC, DIFF_AT,PLI_GC,PLI_AT,PLI_GC_1000,PLI_AT_1000 = purine_loading(seq) ### DEF3 ### | 218 TOTAL, DIFF_GC, DIFF_AT,PLI_GC,PLI_AT,PLI_GC_1000,PLI_AT_1000 = purine_loading(seq) ### DEF3 ### |
| 237 fileOUT_NUC.write("%.5f,%.5f,%.5f,%.5f," %(prop_A,prop_T,prop_C,prop_G)) | 219 fileOUT_NUC.write("%.5f,%.5f,%.5f,%.5f," %(prop_A,prop_T,prop_C,prop_G)) |
| 238 fileOUT_percent_GC.write("%.5f," %percent_GC) | 220 fileOUT_percent_GC.write("%.5f," %percent_GC) |
| 244 fileOUT_Purine_Load.write("\n") | 226 fileOUT_Purine_Load.write("\n") |
| 245 fileOUT_NUC.close() | 227 fileOUT_NUC.close() |
| 246 fileOUT_percent_GC.close() | 228 fileOUT_percent_GC.close() |
| 247 fileOUT_percent_purine.close() | 229 fileOUT_percent_purine.close() |
| 248 fileOUT_Purine_Load.close() | 230 fileOUT_Purine_Load.close() |
| 231 |
