Mercurial > repos > abims-sbr > cds_search

--- a/scripts/S01_find_orf_on_multiple_alignment.py	Wed Feb 28 06:05:15 2018 -0500
+++ b/scripts/S01_find_orf_on_multiple_alignment.py	Wed Feb 28 10:38:40 2018 -0500
@@ -17,20 +17,18 @@

 def code_universel(F1):
     bash_codeUniversel = {}
-    while 1:
-        next = F1.readline()
-        if not next: break
-        L1 = string.split(next, " ")
-        length1 = len(L1)
-        if length1 == 3:
-            key = L1[0]
-            value = L1[2][:-1]
-            bash_codeUniversel[key] = value
-        else:
-            key =  L1[0]
-            value = L1[2]
-            bash_codeUniversel[key] = value
-    F1.close()
+    with open(F1, "r") as file:
+        for line in file.readlines():
+            L1 = string.split(line, " ")
+            length1 = len(L1)
+            if length1 == 3:
+                key = L1[0]
+                value = L1[2][:-1]
+                bash_codeUniversel[key] = value
+            else:
+                key = L1[0]
+                value = L1[2]
+                bash_codeUniversel[key] = value
     return(bash_codeUniversel)
 ###########################################################

@@ -329,9 +327,7 @@
 list_file = str.split(infiles,",")

 ### Get Universal Code
-F2 = open(sys.argv[2], 'r')
-bash_codeUniversel = code_universel(F2)  ### DEF2 ###
-F2.close()
+bash_codeUniversel = code_universel(sys.argv[2])  ### DEF2 ###

 os.mkdir("04_BEST_ORF_nuc")
 Path_OUT1 = "04_BEST_ORF_nuc"
@@ -359,10 +355,8 @@

 for file in list_file:
     count_file_processed = count_file_processed + 1
-    fasta_file_path = "./%s" %file
-    fasta_file = open(fasta_file_path, "r")
-    bash_fasta = dico(fasta_file)   ### DEF 1 ###
-    fasta_file.close()
+    fasta_file_path = "./%s" %file
+    bash_fasta = dico(fasta_file_path)   ### DEF 1 ###
     BESTORF_nuc, BESTORF_nuc_CODING, BESTORF_nuc_CDS_with_M, BESTORF_aa, BESTORF_aa_CODING, BESTORF_aa_CDS_with_M  = find_good_ORF_criteria_3(bash_fasta, bash_codeUniversel)   ### DEF 4 - PART 2 - ###

     ## a ## OUTPUT BESTORF_nuc
--- a/scripts/S02_remove_too_short_bit_or_whole_sequence.py	Wed Feb 28 06:05:15 2018 -0500
+++ b/scripts/S02_remove_too_short_bit_or_whole_sequence.py	Wed Feb 28 10:38:40 2018 -0500
@@ -78,8 +78,8 @@
 path_OUT2 = "07_CDS_nuc"

 for file in L_IN1:
-    file_INaa = open("%s/%s" %(path_IN1, file), "r")
-    file_INnuc = open("%s/%s" %(path_IN2, file), "r")
+    file_INaa = "%s/%s" %(path_IN1, file)
+    file_INnuc = "%s/%s" %(path_IN2, file)

     dico_aa = dico(file_INaa)   ### DEF 0 ###
     dico_nuc = dico(file_INnuc)   ### DEF 0 ###
@@ -174,9 +174,6 @@
 	if nb not in list_sp and nb != "sp0" :
 	    list_sp.append(nb)

-    file_INaa.close()
-    file_INnuc.close()
-
 # [FILTER 5]: check if the number of locus with the max number of species isn't 0
 #if it is : MIN_SPECIES_NB - 1
 if len(list_sp) < MIN_SPECIES_NB :
--- a/scripts/S03_remove_site_with_not_enough_species_represented.py	Wed Feb 28 06:05:15 2018 -0500
+++ b/scripts/S03_remove_site_with_not_enough_species_represented.py	Wed Feb 28 10:38:40 2018 -0500
@@ -123,14 +123,11 @@


 for file in L_IN1:
-    file_INaa = open("%s/%s" %(path_IN1, file), "r")
-    file_INnuc = open("%s/%s" %(path_IN2, file), "r")
+    file_INaa = "%s/%s" %(path_IN1, file)
+    file_INnuc = "%s/%s" %(path_IN2, file)

     dico_aa = dico(file_INaa)   ### DEF 1 ###
     dico_nuc = dico(file_INnuc)   ### DEF 1 ###
-
-    file_INaa.close()
-    file_INnuc.close()

     if len(dico_aa) < MIN_SPECIES_NB :
 	list_file.append(file)
@@ -140,8 +137,8 @@


 for file in L_IN1 :
-    file_INaa = open("%s/%s" %(path_IN1, file), "r")
-    file_INnuc = open("%s/%s" %(path_IN2, file), "r")
+    file_INaa = "%s/%s" %(path_IN1, file)
+    file_INnuc = "%s/%s" %(path_IN2, file)

     dico_aa = dico(file_INaa)   ### DEF 1 ###
     dico_nuc = dico(file_INnuc)   ### DEF 1 ###
@@ -156,10 +153,6 @@
         seq0 = filtered_bash_nuc[k0]
         new_leng_nuc = len(seq0)

-    ## 4.2 ## Close INPUT
-    file_INaa.close()
-    file_INnuc.close()
-
     ## 4.3 ## Change file name for output, depending the number of species remaining in the alignment
     LS = string.split(file, "_NEW")
     LS = "".join(LS)
--- a/scripts/dico.py	Wed Feb 28 06:05:15 2018 -0500
+++ b/scripts/dico.py	Wed Feb 28 10:38:40 2018 -0500
@@ -1,16 +1,13 @@
-import string
+import string, itertools

-def dico(F1):
+def dico(F1):
     dicoco = {}
-    while 1:
-        next2 = F1.readline()
-        if not next2:
-            break
-        if next2[0] == ">":
-            fasta_name_query = next2[:-1]
-            Sn = string.split(fasta_name_query, "||")
-            fasta_name_query = Sn[0]
-            next3 = F1.readline()
-            fasta_seq_query = next3[:-1]
-            dicoco[fasta_name_query]=fasta_seq_query
+    with open(F1, "r") as file:
+        for name, query in itertools.izip_longest(*[file]*2):
+            if name[0] == ">":
+                fasta_name_query = name[:-1]
+                Sn = string.split(fasta_name_query, "||")
+                fasta_name_query = Sn[0]
+                fasta_seq_query = query[:-1]
+                dicoco[fasta_name_query] = fasta_seq_query
     return(dicoco)