diff scripts/S01b_study_seq_composition_aa.py @ 1:8de21b6eb110 draft

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
author abims-sbr
date Wed, 27 Sep 2017 10:04:08 -0400
parents 78dd6454f6f0
children 988467f963f0
line wrap: on
line diff
--- a/scripts/S01b_study_seq_composition_aa.py	Tue May 02 04:20:51 2017 -0400
+++ b/scripts/S01b_study_seq_composition_aa.py	Wed Sep 27 10:04:08 2017 -0400
@@ -3,14 +3,13 @@
 ## Author: Eric FONTANILLAS
 ## Date: 21.12.10
 ## Object: Test for compositional bias in genome and proteome as marker of thermal adaptation (comparison between 2 "hot" species: Ap and Ps and two "cold" species: Pg, Pp)
-import sys, os
+import sys,os,shutil,subprocess,string
 script_path = os.path.dirname(sys.argv[0])
 
 #############
 ### DEF 0 ###
 #############
-def simplify_fasta_name(fasta_name,LT):
-
+def simplify_fasta_name(fasta_name,LT):    
     for abbreviation in LT:
         if abbreviation in fasta_name:
             new_fasta_name = abbreviation
@@ -421,7 +420,7 @@
 ###################
 ### RUN RUN RUN ###
 ###################
-import sys,os,zipfile,shutil,subprocess,string
+
 
 ##Create specific folders
 Path_IN_loci_NUC = "./IN_AA"
@@ -429,19 +428,9 @@
 os.makedirs(Path_IN_loci_NUC)
 os.makedirs(outpath)
 
-
-#Check if the file is a zip or fasta file
-
-the_zip_file = zipfile.ZipFile(sys.argv[1])
-ret = the_zip_file.testzip()
-
-if ret is not None:
-    shutil.copy2(sys.argv[1], './IN_AA/input.fasta')
-else:
-    cmd="unzip %s -d ./IN_AA"%(sys.argv[1])
-    os.system(cmd)
-
-
+infiles = str.split(sys.argv[1], ",")
+for file in infiles:
+    os.system("cp %s %s" %(file, Path_IN_loci_NUC))
 
 ## 1 ## List taxa
 LT=[]
@@ -456,7 +445,7 @@
 
 
 ## 2 ## PathIN
-fileIN_properties = open("%s/01_AminoAcid_Properties2.csv"%(script_path), "r")
+fileIN_properties = open("amino_acid_properties.csv", "r")
 Path_IN_loci_AA = "./IN_AA"
 #Path_IN_loci_AA = "02_CDS_No_Missing_Data_aa_CDS_withM"
 Lloci_AA = os.listdir(Path_IN_loci_AA)
@@ -464,14 +453,14 @@
 ## 3 ## PathOUT
 
 ## 3.1 ## PROT composition
-fileOUT_PROT_ALL=open("./OUT/13_prot_compositions_All_AA.csv","w")
+fileOUT_PROT_ALL=open("./OUT/prot_compositions_All_AA.csv","w")
 fileOUT_PROT_ALL.write("LOCUS,")
 for taxa in LT:
     fileOUT_PROT_ALL.write("%s_prop_K,%s_prop_R,%s_prop_A,%s_prop_F,%s_prop_I,%s_prop_L,%s_prop_M,%s_prop_V,%s_prop_W,%s_prop_N,%s_prop_Q,%s_prop_S,%s_prop_T,%s_prop_H,%s_prop_Y,%s_prop_C,%s_prop_D,%s_prop_E,%s_prop_P,%s_prop_G," %(taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa))
 fileOUT_PROT_ALL.write("\n")
 
 ## 3.2 ## PROT IVYWREL
-fileOUT_IVYWREL=open("./OUT/14_IVYWREL.csv","w")
+fileOUT_IVYWREL=open("./OUT/IVYWREL.csv","w")
 fileOUT_IVYWREL.write("LOCUS,")
 for taxa in LT:
     fileOUT_IVYWREL.write("%s_count_IVYWREL,%s_prop_IVYWREL," %(taxa,taxa))
@@ -479,14 +468,14 @@
 
 
 ## 3.3 ## PROT ERK_DNQTSHA
-fileOUT_ERK_DNQTSH=open("./OUT/15_ERK_DNQTSH.csv","w")
+fileOUT_ERK_DNQTSH=open("./OUT/ERK_DNQTSH.csv","w")
 fileOUT_ERK_DNQTSH.write("LOCUS,")
 for taxa in LT:
     fileOUT_ERK_DNQTSH.write("%s_count_ERK,%s_prop_ERK,%s_count_DNQTSH,%s_prop_DNQTSH,%s_ratio_ERK_vs_DNQTSH," %(taxa,taxa,taxa,taxa,taxa))
 fileOUT_ERK_DNQTSH.write("\n")
 
 ## 3.4 ## PROT EK_QH
-fileOUT_EK_QH=open("./OUT/16_EK_QH.csv","w")
+fileOUT_EK_QH=open("./OUT/EK_QH.csv","w")
 fileOUT_EK_QH.write("LOCUS,")
 for taxa in LT:
     fileOUT_EK_QH.write("%s_count_EK,%s_prop_EK,%s_count_QH,%s_prop_QH,%s_ratio_EK_vs_QH," %(taxa,taxa,taxa,taxa,taxa))
@@ -494,7 +483,7 @@
 
 
 ## 3.5 ## PROT FYMINK_GARP
-fileOUT_FYMINK_GARP=open("./OUT/17_FYMINK_GARP.csv","w")
+fileOUT_FYMINK_GARP=open("./OUT/FYMINK_GARP.csv","w")
 fileOUT_FYMINK_GARP.write("LOCUS,")
 for taxa in LT:
     fileOUT_FYMINK_GARP.write("%s_count_FYMINK,%s_prop_FYMINK,%s_count_GARP,%s_prop_GARP," %(taxa,taxa,taxa,taxa))
@@ -502,56 +491,56 @@
 
 
 ## 3.6 ## PROT AVLIMFYW
-fileOUT_AVLIMFYW=open("./OUT/18_AVLIMFYW.csv","w")
+fileOUT_AVLIMFYW=open("./OUT/AVLIMFYW.csv","w")
 fileOUT_AVLIMFYW.write("LOCUS,")
 for taxa in LT:
     fileOUT_AVLIMFYW.write("%s_count_AVLIMFYW,%s_prop_AVLIMFYW,%s_count_AVLIM,%s_prop_AVLIM,%s_count_FYW,%s_prop_FYW," %(taxa,taxa,taxa,taxa,taxa,taxa))
 fileOUT_AVLIMFYW.write("\n")
 
 ## 3.7 ## PROT STNQ
-fileOUT_STNQ=open("./OUT/19_STNQ.csv","w")
+fileOUT_STNQ=open("./OUT/STNQ.csv","w")
 fileOUT_STNQ.write("LOCUS,")
 for taxa in LT:
     fileOUT_STNQ.write("%s_count_STNQ,%s_prop_STNQ," %(taxa,taxa))
 fileOUT_STNQ.write("\n")
 
 ## 3.8 ## PROT RHKDE
-fileOUT_RHKDE=open("./OUT/20_RHKDE.csv","w")
+fileOUT_RHKDE=open("./OUT/RHKDE.csv","w")
 fileOUT_RHKDE.write("LOCUS,")
 for taxa in LT:
     fileOUT_RHKDE.write("%s_count_RHKDE,%s_prop_RHKDE,%s_count_RHK,%s_prop_RHK,%s_count_DE,%s_prop_DE," %(taxa,taxa,taxa,taxa,taxa,taxa))
 fileOUT_RHKDE.write("\n")
 
 ## 3.9 ## PROT DIDER CRITERIA
-fileOUT_PAYRE=open("./OUT/21_PAYRE-MVGDS.csv","w")
+fileOUT_PAYRE=open("./OUT/PAYRE-MVGDS.csv","w")
 fileOUT_PAYRE.write("LOCUS,")
 for taxa in LT:
     fileOUT_PAYRE.write("%s_count_PAYRE,%s_prop_PAYRE,%s_count_AC,%s_prop_AC,%s_count_MVGDS,%s_prop_MVGDS,%s_ratio_PAYRE_vs_MVGDS,%s_ratio_AC_vs_MVGDS," %(taxa,taxa,taxa,taxa,taxa,taxa,taxa,taxa))
 fileOUT_PAYRE.write("\n")
 
 ## 3.10 ## PROT Total residue weight
-fileOUT_TotalResidueWeight=open("./OUT/22_TotalResidueWeight.csv","w")
+fileOUT_TotalResidueWeight=open("./OUT/TotalResidueWeight.csv","w")
 fileOUT_TotalResidueWeight.write("LOCUS,")
 for taxa in LT:
     fileOUT_TotalResidueWeight.write("%s_Total_Residue_Weight," %taxa)
 fileOUT_TotalResidueWeight.write("\n")
 
 ## 3.11 ## PROT Total residue volume
-fileOUT_TotalResidueVolume=open("./OUT/23_TotalResidueVolume.csv","w")
+fileOUT_TotalResidueVolume=open("./OUT/TotalResidueVolume.csv","w")
 fileOUT_TotalResidueVolume.write("LOCUS,")
 for taxa in LT:
     fileOUT_TotalResidueVolume.write("%s_Total_Residue_Volume," %taxa)
 fileOUT_TotalResidueVolume.write("\n")
 
 ## 3.12 ## PROT Total partial specific volume
-fileOUT_TotalPartialSpecificVolume=open("./OUT/24_TotalPartialSpecificVolume.csv","w")
+fileOUT_TotalPartialSpecificVolume=open("./OUT/TotalPartialSpecificVolume.csv","w")
 fileOUT_TotalPartialSpecificVolume.write("LOCUS,")
 for taxa in LT:
     fileOUT_TotalPartialSpecificVolume.write("%s_Total_Partial_Specific_Volume," %taxa)
 fileOUT_TotalPartialSpecificVolume.write("\n")
 
 ## 3.13 ## PROT Total hydratation
-fileOUT_TotalHydratation=open("./OUT/25_TotalHydratation.csv","w")
+fileOUT_TotalHydratation=open("./OUT/TotalHydratation.csv","w")
 fileOUT_TotalHydratation.write("LOCUS,")
 for taxa in LT:
     fileOUT_TotalHydratation.write("%s_Total_Hydratation," %taxa)
@@ -623,3 +612,4 @@
     
     
 
+