Mercurial > repos > abims-sbr > blastalign
changeset 1:86037c2b27d9 draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit ab76075e541dd7ece1090f6b55ca508ec0fde39d
author | lecorguille |
---|---|
date | Thu, 13 Apr 2017 09:47:31 -0400 |
parents | aba551b2b79e |
children | 92615a423389 |
files | CHANGELOG.md README.md scripts/S01_prepare_BlastAlign_runs.py scripts/S02_phylip2fasta.py |
diffstat | 4 files changed, 180 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/CHANGELOG.md Thu Apr 13 05:47:47 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -Changelog - -Version 1.0 - 13/04/2017 - - - Add functional test with planemo - - Planemo test with conda dependencies for blastalign, blast-legacy, perl, python - - Scripts renamed + symlinks to the directory 'scripts'
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Thu Apr 13 09:47:31 2017 -0400 @@ -0,0 +1,7 @@ +Changelog + +Version 1.0 - 13/04/2017 + + - Add functional test with planemo + - Planemo test with conda dependencies for blastalign, blast-legacy, perl, python + - Scripts renamed + symlinks to the directory 'scripts'
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/S01_prepare_BlastAlign_runs.py Thu Apr 13 09:47:31 2017 -0400 @@ -0,0 +1,119 @@ +#!/usr/bin/python + +## AUTHOR: Eric Fontanillas +## LAST VERSION: 20/08/14 by Julie BAFFARD + +## DESCRIPTION: Prepare to run multialign on assemblages on several cluster nodes + +import os,sys +script_path = os.path.dirname(sys.argv[0]) +############################################### +### DEF 1 : Split a list in several sublist ### +############################################### +def chunks(list, n): + """ Yield successive n-sized chunks from l. + """ + for i in xrange(0, len(list), n): + yield list[i:i+n] +###################################### + + +########################################## +### DEF 2 : Prepare run for blastalign ### +########################################## +def prepare_BLASTALIGN_runs(list_file): + + ln = len(list_file) + i = 0 + list_of_sublist = list(chunks(list_file, 5000)) ### DEF2 ### + + list_files_failed = [] + + k=0 + for sublist in list_of_sublist: + for fasta_file in sublist: + i = i + 1 + S1 = string.split(fasta_file, ".") + fasta_name = S1[0] + + # filter the "N" + f = open("./%s.fasta" %fasta_name, "r") + nextline=f.readlines() + f.close() + + j = 0 + while j<len(nextline) : + if not nextline[j].startswith(">") : + nextline[j] = nextline[j].upper() + nombre = nextline[j].rfind("N") + if nombre != -1 : + nextline[j] = nextline[j].replace("N", "") + j+=1 + nextline = "".join(nextline) + + files = open("./%s.fasta" %fasta_name, "w") + files.write(nextline) + files.close() + + ## run individual script + os.system("BlastAlign -m %s -n %s -i ./%s.fasta\n" %(sys.argv[3],sys.argv[4],fasta_name)) + + try: + phylip_file = open("./%s.fasta.phy" %fasta_name, "r") + except IOError: + list_files_failed.append(fasta_file) + + if sys.argv[2] == "oui" : + try: # in the case BlastAlign failed (there is no .fasta.phy in that case) + with open("%s.fasta.phy" %fasta_name): os.system("python %s/S02_phylip2fasta.py ./%s.fasta.phy ./%s.fasta.fasta\n" %(script_path,fasta_name, fasta_name)) + except IOError: + pass + + os.system("rm -f ./%s.fasta\n" %fasta_name) + + if os.path.isfile("%s.fasta.fasta" %fasta_name): # in the case BlastAlign failed (there is no .fasta.fasta in that case) + os.system("mv ./%s.fasta.fasta ./%s.fasta\n" %(fasta_name, fasta_name)) + + return(list_files_failed) + +###################################### + +################### +### RUN RUN RUN ### +################### + +import string, os, time, re, sys, zipfile, re + +## 1 ## INPUT/OUTPUT +list_file = [] +zfile = zipfile.ZipFile(sys.argv[1]) +for name in zfile.namelist() : + list_file.append(name) + zfile.extract(name, "./") + +## 2 ## RUN +list_files_failed = prepare_BLASTALIGN_runs(list_file) ### DEF2 ### +f_failed = open("./list_files_failed.txt", "w") +f_failed.write("Number of files failed with BlastAlign : %d\n" %len(list_files_failed)) +for files in list_files_failed : + f_failed.write("\t%s \n" %files) + + +#Convertion in zip format +f_phylip = zipfile.ZipFile("Alignment_locus_phy.zip", "w") +f_nexus = zipfile.ZipFile("Alignment_locus_nxs.zip", "w") +f_fasta = zipfile.ZipFile("Alignment_locus_fasta.zip", "w") + +phylip = "^.*fasta.phy$" +nexus = "^.*fasta.nxs$" +fasta = "^.*fasta$" + +folder = os.listdir("./") + +for files in folder : + if re.match(phylip, files) : + f_phylip.write("./%s" %files) + if re.match(nexus, files) : + f_nexus.write("./%s" %files) + if re.match(fasta, files) : + f_fasta.write("./%s" %files)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/S02_phylip2fasta.py Thu Apr 13 09:47:31 2017 -0400 @@ -0,0 +1,54 @@ +#!/usr/bin/python + +## AUTHOR: Eric Fontanillas +## LAST VERSION: 20/08/14 by Julie BAFFARD + +## DESCRIPTION: formatting a fasta format into phylip format for using with PAML + +import string, os, sys + +if len(sys.argv) == 1: + print "put arguments!!" + print "USAGE: $T4S02_phylip2fasta.py INPUT OUTPUT" + + +## INPUT +f1 = sys.argv[1] +F1 = open("%s" %f1, 'r') + +## OUTPUT +f2 = sys.argv[2] +F2 = open("%s" %f2, 'w') + +###### def1 ###### +# Dans un multialignement fasta, cette fonction permet de formatter les noms de chaque sequence fasta + +def format(File_IN): + c = 0 + fichier = "" + while 1 : + c = c + 1 + next = File_IN.readline() + if not next : + break + + S1 = string.split(next, "\t") # list : [name, sequence] --- BUG CORRECTED : "\t" instead of " " + fasta_name = S1[0] # get sequence name + fasta_seq = S1[1][:-1] # get sequence without the terminal '\n' + fichier = fichier + ">" + fasta_name + "\n" + fasta_seq + "\n" + + return (fichier,c) +#-#-#-#-#-#-#-#-#-#-# + +################### +### RUN RUN RUN ### +################### + +F1.readline() ## jump the first line + +fichier_txt, c = format(F1) ### DEF1 ### + +F2.write(fichier_txt) + +F1.close() +F2.close()