diff scripts/S02b_study_seq_composition_nuc.py @ 1:8de21b6eb110 draft

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
author abims-sbr
date Wed, 27 Sep 2017 10:04:08 -0400
parents 78dd6454f6f0
children 988467f963f0
line wrap: on
line diff
--- a/scripts/S02b_study_seq_composition_nuc.py	Tue May 02 04:20:51 2017 -0400
+++ b/scripts/S02b_study_seq_composition_nuc.py	Wed Sep 27 10:04:08 2017 -0400
@@ -4,11 +4,11 @@
 ## Object: Test for compositional bias in genome and proteome as marker of thermal adaptation (comparison between 2 "hot" species: Ap and Ps and one "cold" species: Pg)
 
 
-import sys,os,zipfile,shutil,subprocess
+import sys,os,shutil,subprocess, string
 
 #############
 ### DEF 0 ###
-#############import sys,os,zipfile
+
 def simplify_fasta_name(fasta_name,LT):
     for abbreviation in LT:
         if abbreviation in fasta_name:
@@ -140,11 +140,6 @@
 ###################
 ### RUN RUN RUN ###
 ###################
-import string, os,sys,zipfile
-
-
-
-
 
 ##Create specific folders
 Path_IN_loci_NUC = "./IN_NUC"
@@ -152,20 +147,9 @@
 os.makedirs(Path_IN_loci_NUC)
 os.makedirs(outpath)
 
-
-
-
-#Check if the file is a zip or fasta file
-
-the_zip_file = zipfile.ZipFile(sys.argv[1])
-ret = the_zip_file.testzip()
-
-if ret is not None:
-    shutil.copy2(sys.argv[1], './IN_NUC/input.fasta')
-else:
-    cmd="unzip %s -d ./IN_NUC"%(sys.argv[1])
-    os.system(cmd)
-
+infiles = str.split(sys.argv[1], ",")
+for file in infiles:
+    os.system("cp %s %s" %(file, Path_IN_loci_NUC))
 
 ## 1 ## List taxa
 LT=[]
@@ -188,28 +172,28 @@
 
 ## 3 ## PathOUT
 ## 3.1 ## NUC composition
-fileOUT_NUC=open("./OUT/10_nuc_compositions.csv","w")
+fileOUT_NUC=open("./OUT/nuc_compositions.csv","w")
 fileOUT_NUC.write("LOCUS,")
 for taxa in LT:
     fileOUT_NUC.write("%s_prop_A,%s_prop_T,%s_prop_C,%s_prop_G," %(taxa,taxa,taxa,taxa))
 fileOUT_NUC.write("\n")
 
 ## 3.2 ## NUC percent_GC
-fileOUT_percent_GC=open("./OUT/11_percent_GC.csv","w")
+fileOUT_percent_GC=open("./OUT/percent_GC.csv","w")
 fileOUT_percent_GC.write("LOCUS,")
 for taxa in LT:
     fileOUT_percent_GC.write("%s_percent_GC," %(taxa))
 fileOUT_percent_GC.write("\n")
 
 ## 3.3 ## NUC percent_purine
-fileOUT_percent_purine=open("./OUT/12_percent_purine.csv","w")
+fileOUT_percent_purine=open("./OUT/percent_purine.csv","w")
 fileOUT_percent_purine.write("LOCUS,")
 for taxa in LT:
     fileOUT_percent_purine.write("%s_percent_purine," %(taxa))
 fileOUT_percent_purine.write("\n")
 
 ## 3.4 ## Purine Load
-fileOUT_Purine_Load=open("./OUT/12_Purine_Load_Indice.csv", "w")
+fileOUT_Purine_Load=open("./OUT/Purine_Load_Indice.csv", "w")
 fileOUT_Purine_Load.write("LOCUS,")
 for taxa in LT:
     fileOUT_Purine_Load.write("%s_TOTAL,%s_DIFF_GC,%s_DIFF_AT,%s_PLI_GC1000,%s_PLI_AT1000," %(taxa,taxa,taxa,taxa,taxa))
@@ -227,10 +211,8 @@
     fileOUT_percent_GC.write("%s," %locus)
     fileOUT_percent_purine.write("%s," %locus)
     fileOUT_Purine_Load.write("%s," %locus)
-    #print bash
-    for taxa in LT:
-        print taxa
-	if taxa in bash.keys():
+    
+    if taxa in bash.keys():
             seq = bash[taxa]
             percent_GC, percent_purine,prop_A, prop_T, prop_C, prop_G = base_composition(seq)   ### DEF2 ###
             TOTAL, DIFF_GC, DIFF_AT,PLI_GC,PLI_AT,PLI_GC_1000,PLI_AT_1000 = purine_loading(seq) ### DEF3 ###
@@ -246,3 +228,4 @@
 fileOUT_percent_GC.close()
 fileOUT_percent_purine.close()
 fileOUT_Purine_Load.close()
+