diff scripts/S02b_study_seq_composition_nuc.py @ 2:988467f963f0 draft

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
author abims-sbr
date Wed, 17 Jan 2018 08:57:49 -0500
parents 8de21b6eb110
children 5766f80370e7
line wrap: on
line diff
--- a/scripts/S02b_study_seq_composition_nuc.py	Wed Sep 27 10:04:08 2017 -0400
+++ b/scripts/S02b_study_seq_composition_nuc.py	Wed Jan 17 08:57:49 2018 -0500
@@ -1,63 +1,11 @@
 #!/usr/bin/env python
 ## Author: Eric FONTANILLAS
 ## Date: 21.12.10
+## Last Version : 12/2017 by Victor Mataigne
 ## Object: Test for compositional bias in genome and proteome as marker of thermal adaptation (comparison between 2 "hot" species: Ap and Ps and one "cold" species: Pg)
 
-
 import sys,os,shutil,subprocess, string
-
-#############
-### DEF 0 ###
-
-def simplify_fasta_name(fasta_name,LT):
-    for abbreviation in LT:
-        if abbreviation in fasta_name:
-            new_fasta_name = abbreviation
-
-    return(new_fasta_name)
-##########################################
-
-###########
-## DEF1 ##
-###########
-## Generates bash, with key = fasta name; value = sequence (WITH GAP, IF ANY, REMOVED IN THIS FUNCTION)
-
-def dico(fasta_file,LT):
-
-    count_fastaName=0
-    F1 = open(fasta_file, "r")
-    
-    bash1 = {}
-    while 1:
-        nextline = F1.readline()
-        #print nextline
-        if not nextline :
-            break
-        
-        if nextline[0] == ">":
-            count_fastaName = count_fastaName + 1
-            fasta_name = nextline[1:-1]
-            nextline = F1.readline()
-            sequence = nextline[:-1]
-            
-            if fasta_name not in bash1.keys():
-                fasta_name = simplify_fasta_name(fasta_name,LT)  ### DEF 0 ###
-                bash1[fasta_name] = sequence
-            else:
-                print fasta_name
-
-    # Find alignment length
-    kk = bash1.keys()
-    key0 = kk[0]
-    seq0 = bash1[key0]
-    ln_seq = len(seq0)
-
-    F1.close()
-    
-    return(bash1)
-#####################################
-
-
+from functions import simplify_fasta_name, dico
 
 ##################
 ###### DEF2 ######
@@ -205,21 +153,28 @@
 for locus in Lloci_NUC:
     print locus
     path_locus = "%s/%s" %(Path_IN_loci_NUC, locus)
-    bash = dico(path_locus,LT)
+    bash = dico(path_locus,LT) 
 
     fileOUT_NUC.write("%s," %locus)
     fileOUT_percent_GC.write("%s," %locus)
     fileOUT_percent_purine.write("%s," %locus)
     fileOUT_Purine_Load.write("%s," %locus)
-    
-    if taxa in bash.keys():
-            seq = bash[taxa]
-            percent_GC, percent_purine,prop_A, prop_T, prop_C, prop_G = base_composition(seq)   ### DEF2 ###
-            TOTAL, DIFF_GC, DIFF_AT,PLI_GC,PLI_AT,PLI_GC_1000,PLI_AT_1000 = purine_loading(seq) ### DEF3 ###
-            fileOUT_NUC.write("%.5f,%.5f,%.5f,%.5f," %(prop_A,prop_T,prop_C,prop_G))
-            fileOUT_percent_GC.write("%.5f," %percent_GC)
-            fileOUT_percent_purine.write("%.5f," %percent_purine)
-            fileOUT_Purine_Load.write("%d,%d,%d,%.5f,%.5f," %(TOTAL, DIFF_GC, DIFF_AT,PLI_GC_1000, PLI_AT_1000))
+
+    for taxa in LT:    
+      if taxa in bash.keys():        
+        seq = bash[taxa]            
+        percent_GC, percent_purine,prop_A, prop_T, prop_C, prop_G = base_composition(seq)   ### DEF2 ###
+        TOTAL, DIFF_GC, DIFF_AT,PLI_GC,PLI_AT,PLI_GC_1000,PLI_AT_1000 = purine_loading(seq) ### DEF3 ###
+        fileOUT_NUC.write("%.5f,%.5f,%.5f,%.5f," %(prop_A,prop_T,prop_C,prop_G))
+        fileOUT_percent_GC.write("%.5f," %percent_GC)
+        fileOUT_percent_purine.write("%.5f," %percent_purine)
+        fileOUT_Purine_Load.write("%d,%d,%d,%.5f,%.5f," %(TOTAL, DIFF_GC, DIFF_AT,PLI_GC_1000, PLI_AT_1000))
+      else:
+        fileOUT_NUC.write("%s,%s,%s,%s," %("n.a","n.a","n.a","n.a"))
+        fileOUT_percent_GC.write("%s," %"n.a")
+        fileOUT_percent_purine.write("%s," %"n.a")
+        fileOUT_Purine_Load.write("%s,%s,%s,%s,%s," %("n.a","n.a","n.a","n.a","n.a"))
+        
     fileOUT_NUC.write("\n")
     fileOUT_percent_GC.write("\n")
     fileOUT_percent_purine.write("\n")
@@ -228,4 +183,3 @@
 fileOUT_percent_GC.close()
 fileOUT_percent_purine.close()
 fileOUT_Purine_Load.close()
-