changeset 1:86037c2b27d9 draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
author lecorguille
date Thu, 13 Apr 2017 09:47:31 -0400
parents aba551b2b79e
children 92615a423389
files CHANGELOG.md README.md scripts/S01_prepare_BlastAlign_runs.py scripts/S02_phylip2fasta.py
diffstat 4 files changed, 180 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/CHANGELOG.md	Thu Apr 13 05:47:47 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-Changelog
-
-Version 1.0 - 13/04/2017
-
-  - Add functional test with planemo
-  - Planemo test with conda dependencies for blastalign, blast-legacy, perl, python
-  - Scripts renamed + symlinks to the directory 'scripts'
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Thu Apr 13 09:47:31 2017 -0400
@@ -0,0 +1,7 @@
+Changelog
+
+Version 1.0 - 13/04/2017
+
+  - Add functional test with planemo
+  - Planemo test with conda dependencies for blastalign, blast-legacy, perl, python
+  - Scripts renamed + symlinks to the directory 'scripts'
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/S01_prepare_BlastAlign_runs.py	Thu Apr 13 09:47:31 2017 -0400
@@ -0,0 +1,119 @@
+#!/usr/bin/python
+
+## AUTHOR: Eric Fontanillas
+## LAST VERSION: 20/08/14 by Julie BAFFARD
+
+## DESCRIPTION: Prepare to run multialign on assemblages on several cluster nodes
+
+import os,sys
+script_path = os.path.dirname(sys.argv[0])
+###############################################
+### DEF 1 : Split a list in several sublist ###
+###############################################
+def chunks(list, n):
+    """ Yield successive n-sized chunks from l.
+    """
+    for i in xrange(0, len(list), n):
+        yield list[i:i+n]
+######################################
+
+
+##########################################
+### DEF 2 : Prepare run for blastalign ###
+##########################################
+def prepare_BLASTALIGN_runs(list_file):
+
+    ln = len(list_file)
+    i = 0
+    list_of_sublist = list(chunks(list_file, 5000))           ### DEF2 ###
+
+    list_files_failed = []
+
+    k=0
+    for sublist in list_of_sublist:
+        for fasta_file in sublist:
+            i = i + 1
+            S1 = string.split(fasta_file, ".")
+            fasta_name = S1[0]
+
+	    # filter the "N"
+	    f = open("./%s.fasta" %fasta_name, "r")
+	    nextline=f.readlines()
+	    f.close()
+
+	    j = 0
+	    while j<len(nextline) :
+		if not nextline[j].startswith(">") :
+		    nextline[j] = nextline[j].upper()
+		    nombre = nextline[j].rfind("N")
+		    if nombre != -1 :
+			nextline[j] = nextline[j].replace("N", "")
+		j+=1
+	    nextline = "".join(nextline)
+
+	    files = open("./%s.fasta" %fasta_name, "w")
+	    files.write(nextline)
+	    files.close()
+
+            ## run individual script
+	    os.system("BlastAlign -m %s -n %s -i ./%s.fasta\n" %(sys.argv[3],sys.argv[4],fasta_name))
+
+	    try:
+        	phylip_file = open("./%s.fasta.phy" %fasta_name, "r")       
+    	    except IOError:
+		list_files_failed.append(fasta_file)
+
+	    if sys.argv[2] == "oui" :
+	    	try: # in the case BlastAlign failed (there is no .fasta.phy in that case)
+	    		with open("%s.fasta.phy" %fasta_name): os.system("python %s/S02_phylip2fasta.py ./%s.fasta.phy ./%s.fasta.fasta\n" %(script_path,fasta_name, fasta_name))
+                except IOError:
+                	pass
+		
+		os.system("rm -f ./%s.fasta\n" %fasta_name)
+
+		if os.path.isfile("%s.fasta.fasta" %fasta_name): # in the case BlastAlign failed (there is no .fasta.fasta in that case)
+		    os.system("mv ./%s.fasta.fasta ./%s.fasta\n" %(fasta_name, fasta_name))		    
+
+    return(list_files_failed)
+            
+######################################
+
+###################
+### RUN RUN RUN ###
+###################
+
+import string, os, time, re, sys, zipfile, re
+
+## 1 ## INPUT/OUTPUT
+list_file = []
+zfile = zipfile.ZipFile(sys.argv[1])
+for name in zfile.namelist() :
+    list_file.append(name)
+    zfile.extract(name, "./")
+
+## 2 ## RUN
+list_files_failed = prepare_BLASTALIGN_runs(list_file)   ### DEF2 ###
+f_failed = open("./list_files_failed.txt", "w")
+f_failed.write("Number of files failed with BlastAlign : %d\n" %len(list_files_failed))
+for files in list_files_failed :
+    f_failed.write("\t%s \n" %files)
+
+
+#Convertion in zip format
+f_phylip = zipfile.ZipFile("Alignment_locus_phy.zip", "w")
+f_nexus = zipfile.ZipFile("Alignment_locus_nxs.zip", "w")
+f_fasta = zipfile.ZipFile("Alignment_locus_fasta.zip", "w")
+
+phylip = "^.*fasta.phy$"
+nexus = "^.*fasta.nxs$"
+fasta = "^.*fasta$"
+
+folder = os.listdir("./")
+
+for files in folder :
+    if re.match(phylip, files) :
+	f_phylip.write("./%s" %files)
+    if re.match(nexus, files) :
+	f_nexus.write("./%s" %files)
+    if re.match(fasta, files) :
+	f_fasta.write("./%s" %files)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/S02_phylip2fasta.py	Thu Apr 13 09:47:31 2017 -0400
@@ -0,0 +1,54 @@
+#!/usr/bin/python
+
+## AUTHOR: Eric Fontanillas
+## LAST VERSION: 20/08/14 by Julie BAFFARD
+
+## DESCRIPTION: formatting a fasta format into phylip format for using with PAML
+
+import string, os, sys
+
+if len(sys.argv) == 1:
+    print "put arguments!!"
+    print "USAGE: $T4S02_phylip2fasta.py INPUT OUTPUT"
+
+
+## INPUT
+f1 = sys.argv[1]
+F1 = open("%s" %f1, 'r')
+
+## OUTPUT
+f2 = sys.argv[2]
+F2 = open("%s" %f2, 'w')
+
+###### def1 ######
+# Dans un multialignement fasta, cette fonction permet de formatter les noms de chaque sequence fasta
+
+def format(File_IN):
+    c = 0
+    fichier = ""
+    while 1 :
+        c = c + 1
+        next = File_IN.readline()
+        if not next :
+            break
+        
+        S1 = string.split(next, "\t")    # list : [name, sequence] --- BUG CORRECTED : "\t" instead of " "
+        fasta_name = S1[0]    # get sequence name
+        fasta_seq = S1[1][:-1]    # get sequence without the terminal '\n'
+        fichier = fichier + ">" + fasta_name + "\n" + fasta_seq + "\n"
+        
+    return (fichier,c)
+#-#-#-#-#-#-#-#-#-#-#
+
+###################
+### RUN RUN RUN ###
+###################
+
+F1.readline() ## jump the first line
+
+fichier_txt, c = format(F1)   ### DEF1 ###
+
+F2.write(fichier_txt)
+
+F1.close()
+F2.close()