diff scripts/S01b_study_seq_composition_aa.py @ 2:988467f963f0 draft

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
author abims-sbr
date Wed, 17 Jan 2018 08:57:49 -0500
parents 8de21b6eb110
children 5766f80370e7
line wrap: on
line diff
--- a/scripts/S01b_study_seq_composition_aa.py	Wed Sep 27 10:04:08 2017 -0400
+++ b/scripts/S01b_study_seq_composition_aa.py	Wed Jan 17 08:57:49 2018 -0500
@@ -2,62 +2,13 @@
 # -*- coding: ascii -*-
 ## Author: Eric FONTANILLAS
 ## Date: 21.12.10
+## Last Version : 12/2017 by Victor Mataigne
 ## Object: Test for compositional bias in genome and proteome as marker of thermal adaptation (comparison between 2 "hot" species: Ap and Ps and two "cold" species: Pg, Pp)
-import sys,os,shutil,subprocess,string
-script_path = os.path.dirname(sys.argv[0])
-
-#############
-### DEF 0 ###
-#############
-def simplify_fasta_name(fasta_name,LT):    
-    for abbreviation in LT:
-        if abbreviation in fasta_name:
-            new_fasta_name = abbreviation
-
-    return(new_fasta_name)
-##########################################
-
-###########
-## DEF1 ##
-###########
-## Generates bash, with key = fasta name; value = sequence (WITH GAP, IF ANY, REMOVED IN THIS FUNCTION)
-
-def dico(fasta_file,LT):
 
-    count_fastaName=0
-    F1 = open(fasta_file, "r")
-    
-    bash1 = {}
-    while 1:
-        nextline = F1.readline()
-        #print nextline
-        if not nextline :
-            break
-        
-        if nextline[0] == ">":
-            count_fastaName = count_fastaName + 1
-            fasta_name = nextline[1:-1]
-            nextline = F1.readline()
-            sequence = nextline[:-1]
-            
-            if fasta_name not in bash1.keys():
-                fasta_name = simplify_fasta_name(fasta_name,LT)  ### DEF 0 ###
-                bash1[fasta_name] = sequence
-            else:
-                print fasta_name
+import sys,os,shutil,subprocess,string
+from functions import simplify_fasta_name, dico
 
-    # Find alignment length
-    kk = bash1.keys()
-    key0 = kk[0]
-    seq0 = bash1[key0]
-    ln_seq = len(seq0)
-
-    F1.close()
-    
-    return(bash1)
-#####################################
-
-
+script_path = os.path.dirname(sys.argv[0])
 
 ##################
 ###### DEF2 ######
@@ -593,6 +544,20 @@
             fileOUT_TotalResidueVolume.write("%.5f," %Total_Residue_Volume)
             fileOUT_TotalPartialSpecificVolume.write("%.5f," %(Total_Partial_Specific_Volume))
             fileOUT_TotalHydratation.write("%.5f," % Total_Hydration)
+        else:
+            fileOUT_PROT_ALL.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s," %("n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a"))
+            fileOUT_IVYWREL.write("%s,%s," %("n.a", "n.a"))
+            fileOUT_ERK_DNQTSH.write("%s,%s,%s,%s,%s," %("n.a","n.a","n.a","n.a","n.a"))
+            fileOUT_EK_QH.write("%s,%s,%s,%s,%s," %("n.a","n.a","n.a","n.a","n.a"))
+            fileOUT_FYMINK_GARP.write("%s,%s,%s,%s," %("n.a","n.a","n.a","n.a"))
+            fileOUT_AVLIMFYW.write("%s,%s,%s,%s,%s,%s," %("n.a","n.a","n.a","n.a","n.a","n.a"))
+            fileOUT_STNQ.write("%s,%s," %("n.a","n.a"))
+            fileOUT_RHKDE.write("%s,%s,%s,%s,%s,%s,"%("n.a","n.a","n.a","n.a","n.a","n.a"))
+            fileOUT_PAYRE.write("%s,%s,%s,%s,%s,%s,%s,%s," %("n.a","n.a","n.a","n.a","n.a","n.a","n.a","n.a"))
+            fileOUT_TotalResidueWeight.write("%s," %"n.a")
+            fileOUT_TotalResidueVolume.write("%s," %"n.a")
+            fileOUT_TotalPartialSpecificVolume.write("%s," %"n.a")
+            fileOUT_TotalHydratation.write("%s," %"n.a")
         
     ## END LINE
     fileOUT_PROT_ALL.write("\n")
@@ -609,7 +574,3 @@
     fileOUT_TotalPartialSpecificVolume.write("\n")
     fileOUT_TotalHydratation.write("\n")
 
-    
-    
-
-