Mercurial > repos > abims-sbr > blastalign
changeset 3:49017ea906b5 draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit cf1b9c905931ca2ca25faa4844d45c908756472f
author | abims-sbr |
---|---|
date | Wed, 17 Jan 2018 08:55:00 -0500 |
parents | 92615a423389 |
children | 9eb5bb56bf41 |
files | BlastAlign.xml scripts/S01_phylip2fasta.py test-data/outputs/locus10_sp2.failed.txt test-data/outputs/locus1_sp2.failed.txt test-data/outputs/locus1_sp3.failed.txt test-data/outputs/locus2_sp2.failed.txt test-data/outputs/locus3_sp2.failed.txt test-data/outputs/locus4_sp2.failed.txt test-data/outputs/locus5_sp2.failed.txt test-data/outputs/locus6_sp2.failed.txt test-data/outputs/locus7_sp2.failed.txt test-data/outputs/locus8_sp2.fasta test-data/outputs/locus8_sp2.nxs test-data/outputs/locus8_sp2.phy test-data/outputs/locus9_sp2.failed.txt |
diffstat | 2 files changed, 89 insertions(+), 129 deletions(-) [+] |
line wrap: on
line diff
--- a/BlastAlign.xml Wed Sep 27 10:02:43 2017 -0400 +++ b/BlastAlign.xml Wed Jan 17 08:55:00 2018 -0500 @@ -34,9 +34,9 @@ #end if && ln -s '$input.element_identifier'".fasta.phy" out.phy && - ln -s '$input.element_identifier'".fasta.nxs" out.nxs && + ln -s '$input.element_identifier'".fasta.nxs" out.nxs #if $fasta_out.value == True - python $__tool_directory__/scripts/S01_phylip2fasta.py out.phy out.fasta + && python $__tool_directory__/scripts/S01_phylip2fasta.py out.phy out.fasta #end if ]]></command> @@ -55,9 +55,9 @@ </inputs> <outputs> - <data format="phy" name="phy" from_work_dir="out.phy" label="Alignment of ${input.name} in phylip" /> - <data format="nxs" name="nxs" from_work_dir="out.nxs" label="Alignment of ${input.name} in nexus" /> - <data format="fasta" name="fasta" from_work_dir="out.fasta" label="Alignment of ${input.name} in fasta"> + <data format="phylip" name="phy" from_work_dir="out.phy" label="Alignment of ${input.name} in phylip" hidden="True"/> + <data format="nexus" name="nxs" from_work_dir="out.nxs" label="Alignment of ${input.name} in nexus" hidden="True"/> + <data format="fasta" name="fasta" from_work_dir="out.fasta" label="Alignment of ${input.name} in fasta" hidden="True"> <filter>fasta_out == True</filter> </data> </outputs> @@ -91,88 +91,94 @@ <output name="nxs" value="outputs/locus1_sp3.nxs" /> <output name="fasta" value="outputs/locus1_sp3.fasta" /> </test> + <test> + <param name="input" ftype="fasta" value="inputs/locus3_sp2.fasta" /> + <section name="advanced_option"> + <param name="m" value="95" /> + <param name="r" value="" /> + <param name="x" value="" /> + <param name="n" value="False" /> + <param name="s" value="0" /> + </section> + <param name="fasta_out" value="False" /> + <output name="phy" value="outputs/locus3_sp2.phy" /> + <output name="nxs" value="outputs/locus3_sp2.nxs" /> + </test> + <test> + <param name="input" ftype="fasta" value="inputs/locus8_sp2.fasta" /> + <section name="advanced_option"> + <param name="m" value="95" /> + <param name="r" value="" /> + <param name="x" value="" /> + <param name="n" value="False" /> + <param name="s" value="0" /> + </section> + <param name="fasta_out" value="True" /> + <output name="phy" value="outputs/locus8_sp2.phy" /> + <output name="nxs" value="outputs/locus8_sp2.nxs" /> + <output name="fasta" value="outputs/locus8_sp2.fasta" /> + </test> <!--locus10_sp2.fasta locus1_sp3.fasta locus2_sp2.fasta locus3_sp2.fasta locus4_sp2.fasta locus5_sp2.fasta locus6_sp2.fasta locus7_sp2.fasta locus8_sp2.fasta locus9_sp2.fasta--> </tests> <help> + <![CDATA[ + +. class:: infomark + +**Authors** BlastAlign has been written by Robert Belshaw and Aris Katzourakis. The scripts of this tool have been written by Eric Fontanillas. .. class:: infomark -**Authors** The script in perl was written by **Robert Belshaw** and **Aris Katzourakis**. - -@HELP_AUTHORS@ +**Galaxy integration** Julie Baffard and ABiMS Team. -============ -What it does -============ - -| This tool takes **nucleic sequences in fasta format** or **'dataset collection list' containing fasta files** and returns a multiple alignement (in Nexus and Phylip formats) using BLAST+ -| +Contact support.abims@sb-roscoff.fr for any questions or concerns about the Galaxy implementation of this tool. -------- -========== -Parameters -========== +**Description** + +BlastAlign takes a set of nucleic sequences in a file in fasta format and returns a multiple alignment (in Nexus and Phylip formats) using BLAST+. This Galaxy implementation works with dataset collections, which allows multiple parallels runs of BlastAlign at once on many files. + +-------- + +**Parameters** The choice of several parameters for the blast is possible. -**-m [maximum proportion of gaps allowed in any one sequence in the final alignement]** - | integer (between 0 and 100) - | By default : 95%, i.e. only removes sequences with extremely short matches. - | We find 50 the most useful. - | - -**-r [name of reference sequence]** - | text - | Default is searching for best candidate. - | If entered, the sequence will be extracted, written to a separate file, and blasted against the original input file. - | +**-m : Proportion of gaps allowed in any one sequence in the final alignement** + integer (between 0 and 100). + By default : 95%, i.e. only removes sequences with extremely short matches. + We find 50 the most useful. -**-x [name of comma-separated sequences to be excluded from this analysis]** - | text - | +**-r : Name of reference sequence** + text. + Default is searching for best candidate. + If entered, the sequence will be extracted, written to a separate file, and blasted against the original input file. + + **-x : Name of sequences to be excluded from the analysis** + text. + names must be comma-separated. -**-n** - | If it's checked : retain original names in output files. - | If isn't checked : to output the 15 character name abbreviations (stripped of potentially problematic characters) that is used in the tool. - | + **-n** + If checked : retain original names in output files. + If not checked : output the 15 character name abbreviations (stripped of potentially problematic characters) that is used in the tool. + Default : checked. -**-s [number of sequences to be used in initial search for reference sequence]** - | integer (between 0 and total number of sequences) - | Default is finding the reference sequence by blasting all sequences against all sequences, only randomly subsampling when it thinks the blast output file might be too large. +**-s : Number of sequences to be used in initial search for reference sequence** + integer (between 0 and total number of sequences). + Default : 0 + Default is finding the reference sequence by blasting all sequences against all sequences, only randomly subsampling when it thinks the blast output file might be too large. -------- -======= -Outputs +<<<<<<< HEAD +**Outputs** + + - 'Alignment_{input.name}_phylip' : + the aligned sequences in Phylip format. ======= - -This tool, produces the following files : - -**Alignment** - | is the output with important informations. - | when the alignment failed with BlastAlign, the name of the file is writting down this output. - | - -**Alignement_file_failed** - | is the output containing the files failed during the run of BlastAlign. - | - -**Alignment_{inputfile}_phylip** - | is the output with the aligned sequences in Phylip format. - | - -**Alignment_{inputfile}_nexus** - | is the output with the aligned sequences in Nexus format. - | - -**Alignment_{input_file}_fasta** - | is the output with the aligned sequences in Fasta format. - --------- - =============== Working Example =============== @@ -215,67 +221,16 @@ | | | BlastAlign finished: it has produced a multiple alignment of 2 sequences and length 720 by aligning to sequence Pf2101/11000920 (proportion of gaps in each sequence is less than 0.95) -| - -**Alignment_{inputfile}_phylip** - -| 2 720 S -| Pf2101/1100 ccggtggccattttctgcacctcgtgggttattgagctgaaagtggttcagctcactgtctgttaacagccgtgtcggtctgagggtatcacagttaatataatgaatcaagagaagttgaagcagctccaggcccaagtccgcatcggaggaaagggcacagcaagaagaaagaagaaggtgattcacagaacagcaacaacagat - -gacaagaaactgcaaagtacactgaagaaattggcagtaaataatattccgggtatagaagaggttaacatgataaaggatgacgggcaagtaatacattttaccaatccgaaggtgcaggcttctcttcagtcaaacacatttgccattaatggccaagccgaaacgaaacaaatcactgacttgctacccggtatattaaatcagctgggggctgaaag -tttaacaaacttgaagaagctggctaaatctgtgactgctggagttgattctgataacaagcaggatgcagcagatattgatgaagatgatgatgatgtcccagaactggttgaaaactttgacgaagcatcgaagaatgaggggacgtaattcttctcccactttatgccatggtagcatcaatcgttttgctgatgatggcgtgtttatacctaccacccagtgtaga -tttgtccagacctggcttgtttgacattgcttgttggattttgcaacaatatcatgattaga - -| Pp171/11000 ccggtggccattttctgcacctcgtgggt-------------------------------------------------------------------aatacaatgaaccaagaaaaattaaaacaactccaagcccaggtgcgcattggaggcaagggtacagcaagaagaaagaagaaggtcattcatagaacagcaacaacagatgataaaaaactgcagag -| tacattaaaaaaactagcagtaaataatattccaggtatagaagaggttaatatgataaaagatgatggacaggtaatacattttaccaatccaaaagtacaggcttctctacagtcaaacacatttgctattaatgggcaagctgagacaaaacaaatcaccgaattgttgcctggtatattaaatcagctgggagcagaaagtttaacaaatctgaagaaact -| ggctacatccgtgactggtggagttgattctgataacaagccagaaacagcagaaattgatgaagacgatgatgatgttccagatttggttgaaaactttgacgaggcatccaagaatgaaggaacgtaatt-----------------------------------------------------------------acccagtgtagatttgt---------------------------------------------- -| ------------- -| - - -**Alignment_{inputfile}_nexus** - -| #NEXUS - -[Aligned to seq Pf2101/1100 by BlastAlign. We have excluded sequences with more than 0.95 gaps] - -BEGIN DATA; - -| dimensions ntax=2 nchar=720; -| format gap=- datatype=DNA; -| matrix +| +>>>>>>> blastalign bug fix but with no file renaming -Pf2101/1100 ccggtggccattttctgcacctcgtgggttattgagctgaaagtggttcagctcactgtctgttaacagccgtgtcggtctgagggtatcacagttaatataatgaatcaagagaagttgaagcagctccaggcccaagtccgcatcggaggaaagggcacagcaagaagaaagaagaaggtgattcacagaacagcaacaacagat -gacaagaaactgcaaagtacactgaagaaattggcagtaaataatattccgggtatagaagaggttaacatgataaaggatgacgggcaagtaatacattttaccaatccgaaggtgcaggcttctcttcagtcaaacacatttgccattaatggccaagccgaaacgaaacaaatcactgacttgctacccggtatattaaatcagctgggggctgaaag -tttaacaaacttgaagaagctggctaaatctgtgactgctggagttgattctgataacaagcaggatgcagcagatattgatgaagatgatgatgatgtcccagaactggttgaaaactttgacgaagcatcgaagaatgaggggacgtaattcttctcccactttatgccatggtagcatcaatcgttttgctgatgatggcgtgtttatacctaccacccagtgtaga -tttgtccagacctggcttgtttgacattgcttgttggattttgcaacaatatcatgattaga - -| Pp171/11000 ccggtggccattttctgcacctcgtgggt-------------------------------------------------------------------aatacaatgaaccaagaaaaattaaaacaactccaagcccaggtgcgcattggaggcaagggtacagcaagaagaaagaagaaggtcattcatagaacagcaacaacagatgataaaaaactgcagag -| tacattaaaaaaactagcagtaaataatattccaggtatagaagaggttaatatgataaaagatgatggacaggtaatacattttaccaatccaaaagtacaggcttctctacagtcaaacacatttgctattaatgggcaagctgagacaaaacaaatcaccgaattgttgcctggtatattaaatcagctgggagcagaaagtttaacaaatctgaagaaac -| tggctacatccgtgactggtggagttgattctgataacaagccagaaacagcagaaattgatgaagacgatgatgatgttccagatttggttgaaaactttgacgaggcatccaagaatgaaggaacgtaatt-----------------------------------------------------------------acccagtgtagatttgt-------------------------------------------- -| ------------- -| ; -| end; -| - - -**Alignment_{inputfile}_fasta** + - 'Alignment_{input.name}_nexus' : + the aligned sequences in Nexus format. -| >Pf2101/11000920 - -ccggtggccattttctgcacctcgtgggttattgagctgaaagtggttcagctcactgtctgttaacagccgtgtcggtctgagggtatcacagttaatataatgaatcaagagaagttgaagcagctccaggcccaagtccgcatcggaggaaagggcacagcaagaagaaagaagaaggtgattcacagaacagcaacaacagatgacaagaaactg -caaagtacactgaagaaattggcagtaaataatattccgggtatagaagaggttaacatgataaaggatgacgggcaagtaatacattttaccaatccgaaggtgcaggcttctcttcagtcaaacacatttgccattaatggccaagccgaaacgaaacaaatcactgacttgctacccggtatattaaatcagctgggggctgaaagtttaacaaacttgaa -gaagctggctaaatctgtgactgctggagttgattctgataacaagcaggatgcagcagatattgatgaagatgatgatgatgtcccagaactggttgaaaactttgacgaagcatcgaagaatgaggggacgtaattcttctcccactttatgccatggtagcatcaatcgttttgctgatgatggcgtgtttatacctaccacccagtgtagatttgtccagacctggc -ttgtttgacattgcttgttggattttgcaacaatatcatgattaga + - 'Alignment_{input_file}_fasta' : + the aligned sequences in Fasta format if the option "fasta format" is checked. -| >Pp171/11000930 - -ccggtggccattttctgcacctcgtgggt-------------------------------------------------------------------aatacaatgaaccaagaaaaattaaaacaactccaagcccaggtgcgcattggaggcaagggtacagcaagaagaaagaagaaggtcattcatagaacagcaacaacagatgataaaaaactgcagagtacattaaaaaa -actagcagtaaataatattccaggtatagaagaggttaatatgataaaagatgatggacaggtaatacattttaccaatccaaaagtacaggcttctctacagtcaaacacatttgctattaatgggcaagctgagacaaaacaaatcaccgaattgttgcctggtatattaaatcagctgggagcagaaagtttaacaaatctgaagaaactggctacatccg -tgactggtggagttgattctgataacaagccagaaacagcagaaattgatgaagacgatgatgatgttccagatttggttgaaaactttgacgaggcatccaagaatgaaggaacgtaatt-----------------------------------------------------------------acccagtgtagatttgt--------------------------------------------------------- - - ---------------------------------------------------- +--------- Changelog --------- @@ -284,16 +239,16 @@ - NEW: BlastAlign will now be launched on one file at once. Although, it will manage a Dataset Collection to deal with numerous files. - **Version 1.0 - 13/04/2017** - TEST: Add funtional test with planemo - - IMPROVEMENT: Use conda dependencies for blastalign, blast-legacy, perl, python + ]]> </help> <expand macro="citations" /> </tool> +
--- a/scripts/S01_phylip2fasta.py Wed Sep 27 10:02:43 2017 -0400 +++ b/scripts/S01_phylip2fasta.py Wed Jan 17 08:55:00 2018 -0500 @@ -6,14 +6,19 @@ ## DESCRIPTION: formatting a fasta format into phylip format for using with PAML import string, os, sys - +""" if len(sys.argv) == 1: print "put arguments!!" - print "USAGE: $T4S02_phylip2fasta.py INPUT OUTPUT" - + print "USAGE: S01_phylip2fasta.py INPUT OUTPUT" +""" ## INPUT -f1 = sys.argv[1] +if os.path.isfile(sys.argv[1]) : + f1 = sys.argv[1] +else: + print "No existing phylip file ; exiting ..." + exit() + F1 = open("%s" %f1, 'r') ## OUTPUT