# HG changeset patch # User abims-sbr # Date 1492076867 14400 # Node ID aba551b2b79e09e686df36009145ecd046f4c724 planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 38545eb765e0df7fcc6b8130e8e5f87cf4481122 diff -r 000000000000 -r aba551b2b79e BlastAlign.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BlastAlign.xml Thu Apr 13 05:47:47 2017 -0400 @@ -0,0 +1,440 @@ + + + + + + Align the nucleic acid sequences + + + + macros.xml + + + + + perl + blast-legacy + blastalign + + + + + + + + ${outfile}; + #if $fasta_out.value == True : + python $__tool_directory__/scripts/S02_phylip2fasta.py out.phy out.fasta >>${outfile}; + #end if + #end if + + #if $files.type == "many" : + python $__tool_directory__/scripts/S01_prepare_BlastAlign_runs.py ${files.many_files} + #if $fasta_out.value == True : + oui + #else : + non + #end if + #if $files.options.option == "yes" : + #if $files.options.options_m.m == True : + ${files.options.options_m.proportion} + #else : + 95 + #end if + #if $files.options.options_n == True : + T + #else : + F + #end if + #elif $files.options.option == "no" : + 95 F + #end if + >${outfile}; + #end if + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + files['type'] == "one" + + + files['type'] == "one" + + + ((files['type'] == "one" and fasta_out == True)) + + + files['type'] == "many" + + + files['type'] == "many" + + + ((files['type'] == "many" and fasta_out == True)) + + + + files_failed == True + + + + + + + + + + + + + + + +============ +What it does +============ + +| This tool takes **nucleic sequences in fasta format** or **zip file containing fasta files** and returns a multiple alignement (in Nexus and Phylip formats) using BLAST+ +| +| The script in perl was written by **Robert Belshaw** and **Aris Katzourakis**. +| The script in python was written by **Eric Fontanillas**. +| The wrapper was written by **Julie Baffard**. + +-------- + +========== +Parameters +========== + +The choice of several parameters for the blast is possible. + +**-m [maximum proportion of gaps allowed in any one sequence in the final alignement]** + | integer (between 0 and 100) + | By default : 95%, i.e. only removes sequences with extremely short matches. + | We find 50 the most useful. + | + +**-r [name of reference sequence]** + | text + | Default is searching for best candidate. + | If entered, the sequence will be extracted, written to a separate file, and blasted against the original input file. + | + +**-x [name of comma-separated sequences to be excluded from this analysis]** + | text + | + +**-n** + | If it's checked : retain original names in output files. + | If isn't checked : to output the 15 character name abbreviations (stripped of potentially problematic characters) that is used in the tool. + | + +**-s [number of sequences to be used in initial search for reference sequence]** + | integer (between 0 and total number of sequences) + | Default is finding the reference sequence by blasting all sequences against all sequences, only randomly subsampling when it thinks the blast output file might be too large. + +.. class:: infomark + +m and n are the only parameters which can used for the 2 options (one file and many files). + +-------- + +======= +Outputs +======= + +This tool, produces the following files : + +**Alignment** + | is the output with important informations. + | when the alignment failed with BlastAlign, the name of the file is writting down this output. + | + +**Alignement_file_failed** + | is the output containing the files failed during the run of BlastAlign. + | + +**Alignment_{inputfile}_phylip** + | is the output with the aligned sequences in Phylip format when you choose "one file" option. + | + +**Alignment_{inputfile}_nexus** + | is the output with the aligned sequences in Nexus format when you choose "one file" option. + | + +**Alignment_{input_file}_fasta** + | is the output with the aligned sequences in Fasta format when you choose "one file" and "fasta forme" options + | + +**Alignment_locus_phylip** + | is the output with the aligned sequences in Phylip format when you choose "many files" option. + | + +**Alignment_locus_nexus** + | is the output with the aligned sequences in Nexus format when you choose "many files" option. + | + +**Alignment_locus_fasta** + | is the output with the aligned sequences in Fasta forme when you choose "many file" and "fasta forme" options + +.. class:: warningmark + +The zip outputs have to be downloaded (and extracts the files with a file archiver software), you cannot visualize them with the "eye icon" through the interface. + +-------- + +=============== +Working Example +=============== + +------------------------------ +The input file and its options +------------------------------ + +**Input file** + +| >Pf210_1/1_1.000_920 +| CCGGTGGCCATTTTCTGCACCTCGTGGGTTATTGAGCTGAAAGTGGTTCAGCTCACTGTCTGTTAACAGCCGTGTCGGTCTGAGGGTATCACAGTTAATATAATGAATCAAGAGAAGTTGAAGCAGCTCCAGGCCCAAGTCCGCATCGGAGGAAAGGG +| CACAGCAAGAAGAAAGAAGAAGGTGATTCACAGAACAGCAACAACAGATGACAAGAAACTGCAAAGTACACTGAAGAAATTGGCAGTAAATAATATTCCGGGTATAGAAGAGGTTAACATGATAAAGGATGACGGGCAAGTAATACATTTTACCAATCCGA +| AGGTGCAGGCTTCTCTTCAGTCAAACACATTTGCCATTAATGGCCAAGCCGAAACGAAACAAATCACTGACTTGCTACCCGGTATATTAAATCAGCTGGGGGCTGAAAGTTTAACAAACTTGAAGAAGCTGGCTAAATCTGTGACTGCTGGAGTTGATTC +| TGATAACAAGCAGGATGCAGCAGATATTGATGAAGATGATGATGATGTCCCAGAACTGGTTGAAAACTTTGACGAAGCATCGAAGAATGAGGGGACGTAATTCTTCTCCCACTTTATGCCATGGTAGCATCAATCGTTTTGCTGATGATGGCGTGTTTATAC +| CTACCACCCAGTGTAGATTTGTCCAGACCTGGCTTGTTTGACATTGCTTGTTGGATTTTGCAACAATATCATGATTAGACTGCCTGGCTTTGTGGCCTAAATACTGTATTAAAGTGTCTGTAAAAGGGAAGCAATTTTTCTATTAAGAAGTTATCCACTAGCAT +| ATTGACAGTTTTGCATGTTTGATTTTGTTCCTCGTGCAGGTCAGAACACTGATTGTACAGTGGCTGATTACAGAAAAATTGTATTCAGAGTTAAATAAACACATTATTATCCAAA +| >Pp_17_1/1_1.000_930 +| CCGGTGGCCATTTTCTGCACCTCGTGGGTATCTTGGGTTCGATTTGTATCAGCTCCCTATGTAAAATTAAACAAACTTATAACATAGATTGCAGCTGACAATACAATGAACCAAGAAAAATTAAAACAACTCCAAGCCCAGGTGCGCATTGGAGGCAAGGG +| TACAGCAAGAAGAAAGAAGAAGGTCATTCATAGAACAGCAACAACAGATGATAAAAAACTGCAGAGTACATTAAAAAAACTAGCAGTAAATAATATTCCAGGTATAGAAGAGGTTAATATGATAAAAGATGATGGACAGGTAATACATTTTACCAATCCAAAA +| GTACAGGCTTCTCTACAGTCAAACACATTTGCTATTAATGGGCAAGCTGAGACAAAACAAATCACCGAATTGTTGCCTGGTATATTAAATCAGCTGGGAGCAGAAAGTTTAACAAATCTGAAGAAACTGGCTACATCCGTGACTGGTGGAGTTGATTCTGAT +| AACAAGCCAGAAACAGCAGAAATTGATGAAGACGATGATGATGTTCCAGATTTGGTTGAAAACTTTGACGAGGCATCCAAGAATGAAGGAACGTAATTTGTCATTGGTAGATCCTCCCATAGCCTGATTCTTGTGGCTGGCGACAGCTTGTTTATATTTTAC + +CCAGTGTAGATTTGTTCAAGAAGGTGTGCTGGCGTTGTTTGAATTTTGTAATAGTACCATGATTTAAATACCCGGTTAACGGCCTACCTGTTATGTAGAAATTGTAGAGAAAAAATTAAATCAATTTTGTATGAACTATAAGCAGCAGCTAATATATTTGCAGTTT +TACATGTTTATCTGTTCATCAGCATGGGTCAGAGAATGACCGTACTTTGCTGGTGATAGAATGCTTGTATTCAAAGTTTAATAAATGGTTGTAAGCCATTTAAAAAAAAAAAAAAA + +**Parameters** + +| option : one file. It's the same for the option "many files" except that the output files are in zip format (inside : 1 file corresponding to one output of BlastAlign) +| no option for the run Blast. So, by default it's -m 95 -n F + +---------------- +The output files +---------------- + +**BlastAlign** + +************************ BlastAlign ************************ + +| +| This program takes nucleotide sequences in fasta format and returns a multiple alignment (in Nexus and Phylip formats) using BLASTN +| +| Input file locus_2_sp_8.fasta has 2 sequences and is 1894 bytes +| (maximum number of sequences that will be used to search for the reference sequence is 770) +| +| +| BlastAlign finished: it has produced a multiple alignment of 2 sequences and length 720 by aligning to sequence Pf2101/11000920 (proportion of gaps in each sequence is less than 0.95) +|  + +.. class:: infomark + +| if you choose the option "many files" +| there will be as template output that number of file in the input zip. +| +| + +**Alignment_{inputfile}_phylip** + +| 2 720 S +| Pf2101/1100 ccggtggccattttctgcacctcgtgggttattgagctgaaagtggttcagctcactgtctgttaacagccgtgtcggtctgagggtatcacagttaatataatgaatcaagagaagttgaagcagctccaggcccaagtccgcatcggaggaaagggcacagcaagaagaaagaagaaggtgattcacagaacagcaacaacagat + +gacaagaaactgcaaagtacactgaagaaattggcagtaaataatattccgggtatagaagaggttaacatgataaaggatgacgggcaagtaatacattttaccaatccgaaggtgcaggcttctcttcagtcaaacacatttgccattaatggccaagccgaaacgaaacaaatcactgacttgctacccggtatattaaatcagctgggggctgaaag +tttaacaaacttgaagaagctggctaaatctgtgactgctggagttgattctgataacaagcaggatgcagcagatattgatgaagatgatgatgatgtcccagaactggttgaaaactttgacgaagcatcgaagaatgaggggacgtaattcttctcccactttatgccatggtagcatcaatcgttttgctgatgatggcgtgtttatacctaccacccagtgtaga +tttgtccagacctggcttgtttgacattgcttgttggattttgcaacaatatcatgattaga + +| Pp171/11000 ccggtggccattttctgcacctcgtgggt-------------------------------------------------------------------aatacaatgaaccaagaaaaattaaaacaactccaagcccaggtgcgcattggaggcaagggtacagcaagaagaaagaagaaggtcattcatagaacagcaacaacagatgataaaaaactgcagag +| tacattaaaaaaactagcagtaaataatattccaggtatagaagaggttaatatgataaaagatgatggacaggtaatacattttaccaatccaaaagtacaggcttctctacagtcaaacacatttgctattaatgggcaagctgagacaaaacaaatcaccgaattgttgcctggtatattaaatcagctgggagcagaaagtttaacaaatctgaagaaact +| ggctacatccgtgactggtggagttgattctgataacaagccagaaacagcagaaattgatgaagacgatgatgatgttccagatttggttgaaaactttgacgaggcatccaagaatgaaggaacgtaatt-----------------------------------------------------------------acccagtgtagatttgt---------------------------------------------- +| ------------- +| + +.. class:: infomark + +| If you choose the option "many file" +| Save as *Galaxy{number}-[Alignment_locus_phy].zip* +| If you unzip the file, a number of files are extracted (depends on the number of locus) : {name_of_file}.phy +| +|  + +**Alignment_{inputfile}_nexus** + +| #NEXUS + +[Aligned to seq Pf2101/1100 by BlastAlign. We have excluded sequences with more than 0.95 gaps] + +BEGIN DATA; + +| dimensions ntax=2 nchar=720; +| format gap=- datatype=DNA; +| matrix + +Pf2101/1100 ccggtggccattttctgcacctcgtgggttattgagctgaaagtggttcagctcactgtctgttaacagccgtgtcggtctgagggtatcacagttaatataatgaatcaagagaagttgaagcagctccaggcccaagtccgcatcggaggaaagggcacagcaagaagaaagaagaaggtgattcacagaacagcaacaacagat +gacaagaaactgcaaagtacactgaagaaattggcagtaaataatattccgggtatagaagaggttaacatgataaaggatgacgggcaagtaatacattttaccaatccgaaggtgcaggcttctcttcagtcaaacacatttgccattaatggccaagccgaaacgaaacaaatcactgacttgctacccggtatattaaatcagctgggggctgaaag +tttaacaaacttgaagaagctggctaaatctgtgactgctggagttgattctgataacaagcaggatgcagcagatattgatgaagatgatgatgatgtcccagaactggttgaaaactttgacgaagcatcgaagaatgaggggacgtaattcttctcccactttatgccatggtagcatcaatcgttttgctgatgatggcgtgtttatacctaccacccagtgtaga +tttgtccagacctggcttgtttgacattgcttgttggattttgcaacaatatcatgattaga + +| Pp171/11000 ccggtggccattttctgcacctcgtgggt-------------------------------------------------------------------aatacaatgaaccaagaaaaattaaaacaactccaagcccaggtgcgcattggaggcaagggtacagcaagaagaaagaagaaggtcattcatagaacagcaacaacagatgataaaaaactgcagag +| tacattaaaaaaactagcagtaaataatattccaggtatagaagaggttaatatgataaaagatgatggacaggtaatacattttaccaatccaaaagtacaggcttctctacagtcaaacacatttgctattaatgggcaagctgagacaaaacaaatcaccgaattgttgcctggtatattaaatcagctgggagcagaaagtttaacaaatctgaagaaac +| tggctacatccgtgactggtggagttgattctgataacaagccagaaacagcagaaattgatgaagacgatgatgatgttccagatttggttgaaaactttgacgaggcatccaagaatgaaggaacgtaatt-----------------------------------------------------------------acccagtgtagatttgt-------------------------------------------- +| ------------- +| ; +| end; +| + +.. class:: infomark + +| If you choose the option "many file" +| Save as *Galaxy{number}-[Alignment_locus_nxs].zip* +| If you unzip the file, a number of files are extracted (depends on the number of locus) : {name_of_file}.nxs +| +| + +**Alignment_{inputfile}_fasta** + +| >Pf2101/11000920 + +ccggtggccattttctgcacctcgtgggttattgagctgaaagtggttcagctcactgtctgttaacagccgtgtcggtctgagggtatcacagttaatataatgaatcaagagaagttgaagcagctccaggcccaagtccgcatcggaggaaagggcacagcaagaagaaagaagaaggtgattcacagaacagcaacaacagatgacaagaaactg +caaagtacactgaagaaattggcagtaaataatattccgggtatagaagaggttaacatgataaaggatgacgggcaagtaatacattttaccaatccgaaggtgcaggcttctcttcagtcaaacacatttgccattaatggccaagccgaaacgaaacaaatcactgacttgctacccggtatattaaatcagctgggggctgaaagtttaacaaacttgaa +gaagctggctaaatctgtgactgctggagttgattctgataacaagcaggatgcagcagatattgatgaagatgatgatgatgtcccagaactggttgaaaactttgacgaagcatcgaagaatgaggggacgtaattcttctcccactttatgccatggtagcatcaatcgttttgctgatgatggcgtgtttatacctaccacccagtgtagatttgtccagacctggc +ttgtttgacattgcttgttggattttgcaacaatatcatgattaga + +| >Pp171/11000930 + +ccggtggccattttctgcacctcgtgggt-------------------------------------------------------------------aatacaatgaaccaagaaaaattaaaacaactccaagcccaggtgcgcattggaggcaagggtacagcaagaagaaagaagaaggtcattcatagaacagcaacaacagatgataaaaaactgcagagtacattaaaaaa +actagcagtaaataatattccaggtatagaagaggttaatatgataaaagatgatggacaggtaatacattttaccaatccaaaagtacaggcttctctacagtcaaacacatttgctattaatgggcaagctgagacaaaacaaatcaccgaattgttgcctggtatattaaatcagctgggagcagaaagtttaacaaatctgaagaaactggctacatccg +tgactggtggagttgattctgataacaagccagaaacagcagaaattgatgaagacgatgatgatgttccagatttggttgaaaactttgacgaggcatccaagaatgaaggaacgtaatt-----------------------------------------------------------------acccagtgtagatttgt--------------------------------------------------------- + +.. class:: infomark + +| If you choose the option "many file" +| Save as *Galaxy{number}-[Alignment_locus_fasta].zip* +| If you unzip the file, a number of files are extracted (depends on the number of locus) : {name_of_file}.fasta + + + + + diff -r 000000000000 -r aba551b2b79e CHANGELOG.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHANGELOG.md Thu Apr 13 05:47:47 2017 -0400 @@ -0,0 +1,7 @@ +Changelog + +Version 1.0 - 13/04/2017 + + - Add functional test with planemo + - Planemo test with conda dependencies for blastalign, blast-legacy, perl, python + - Scripts renamed + symlinks to the directory 'scripts' diff -r 000000000000 -r aba551b2b79e macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Apr 13 05:47:47 2017 -0400 @@ -0,0 +1,16 @@ + + + + python + + + + + Credits : ABIMS team, Roscoff Marine Station + Contact support.abims@sb-roscoff.fr for any questions or concerns about the Galaxy implementation of this tool. + Version 1 : Scripts by Eric Fontanillas -- Galaxy integration by Julie Baffard + Version 2 : improvments by Victor Mataigne, Gildas le Corguillé, Misharl Monsoor + + + + diff -r 000000000000 -r aba551b2b79e test-data/test_05.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_05.out Thu Apr 13 05:47:47 2017 -0400 @@ -0,0 +1,151 @@ + +************************ BlastAlign ************************ + +This program takes nucleotide sequences in fasta format and returns a multiple alignment (in Nexus & Phylip formats) using BLASTN + + +Will exclude sequences where gaps make up more than 95% of the sequence in the final alignment + +Input file ./locus1_sp2.fasta has 2 sequences and is 360 bytes +(maximum number of sequences that will be used to search for the reference sequence is 1767) + + +BlastAlign finished: it has produced a multiple alignment of 2 sequences and length 160 by aligning to sequence Am31/11000160 (percentage of gaps in each sequence is less than 95%) + + +************************ BlastAlign ************************ + +This program takes nucleotide sequences in fasta format and returns a multiple alignment (in Nexus & Phylip formats) using BLASTN + + +Will exclude sequences where gaps make up more than 95% of the sequence in the final alignment + +Input file ./locus2_sp2.fasta has 2 sequences and is 360 bytes +(maximum number of sequences that will be used to search for the reference sequence is 1767) + + +BlastAlign finished: it has produced a multiple alignment of 2 sequences and length 160 by aligning to sequence Am21/11000160 (percentage of gaps in each sequence is less than 95%) + + +************************ BlastAlign ************************ + +This program takes nucleotide sequences in fasta format and returns a multiple alignment (in Nexus & Phylip formats) using BLASTN + + +Will exclude sequences where gaps make up more than 95% of the sequence in the final alignment + +Input file ./locus3_sp2.fasta has 2 sequences and is 625 bytes +(maximum number of sequences that will be used to search for the reference sequence is 1341) + + +BlastAlign finished: it has produced a multiple alignment of 2 sequences and length 240 by aligning to sequence Ac231/11000366 (percentage of gaps in each sequence is less than 95%) + + +************************ BlastAlign ************************ + +This program takes nucleotide sequences in fasta format and returns a multiple alignment (in Nexus & Phylip formats) using BLASTN + + +Will exclude sequences where gaps make up more than 95% of the sequence in the final alignment + +Input file ./locus4_sp2.fasta has 2 sequences and is 360 bytes +(maximum number of sequences that will be used to search for the reference sequence is 1767) + + +BlastAlign finished: it has produced a multiple alignment of 2 sequences and length 160 by aligning to sequence Pf81/11000160 (percentage of gaps in each sequence is less than 95%) + + +************************ BlastAlign ************************ + +This program takes nucleotide sequences in fasta format and returns a multiple alignment (in Nexus & Phylip formats) using BLASTN + + +Will exclude sequences where gaps make up more than 95% of the sequence in the final alignment + +Input file ./locus5_sp2.fasta has 2 sequences and is 360 bytes +(maximum number of sequences that will be used to search for the reference sequence is 1767) + + +BlastAlign finished: it has produced a multiple alignment of 2 sequences and length 160 by aligning to sequence Pf91/11000160 (percentage of gaps in each sequence is less than 95%) + + +************************ BlastAlign ************************ + +This program takes nucleotide sequences in fasta format and returns a multiple alignment (in Nexus & Phylip formats) using BLASTN + + +Will exclude sequences where gaps make up more than 95% of the sequence in the final alignment + +Input file ./locus6_sp2.fasta has 2 sequences and is 360 bytes +(maximum number of sequences that will be used to search for the reference sequence is 1767) + + +BlastAlign finished: it has produced a multiple alignment of 2 sequences and length 160 by aligning to sequence Pf61/11000160 (percentage of gaps in each sequence is less than 95%) + + +************************ BlastAlign ************************ + +This program takes nucleotide sequences in fasta format and returns a multiple alignment (in Nexus & Phylip formats) using BLASTN + + +Will exclude sequences where gaps make up more than 95% of the sequence in the final alignment + +Input file ./locus7_sp2.fasta has 2 sequences and is 360 bytes +(maximum number of sequences that will be used to search for the reference sequence is 1767) + + +BlastAlign finished: it has produced a multiple alignment of 2 sequences and length 160 by aligning to sequence Pf41/11000160 (percentage of gaps in each sequence is less than 95%) + + +************************ BlastAlign ************************ + +This program takes nucleotide sequences in fasta format and returns a multiple alignment (in Nexus & Phylip formats) using BLASTN + + +Will exclude sequences where gaps make up more than 95% of the sequence in the final alignment + +Input file ./locus8_sp2.fasta has 2 sequences and is 590 bytes +(maximum number of sequences that will be used to search for the reference sequence is 1380) + + +************************ BlastAlign ************************ + +This program takes nucleotide sequences in fasta format and returns a multiple alignment (in Nexus & Phylip formats) using BLASTN + + +Will exclude sequences where gaps make up more than 95% of the sequence in the final alignment + +Input file ./locus9_sp2.fasta has 2 sequences and is 361 bytes +(maximum number of sequences that will be used to search for the reference sequence is 1765) + + +BlastAlign finished: it has produced a multiple alignment of 2 sequences and length 160 by aligning to sequence Pf101/11000160 (percentage of gaps in each sequence is less than 95%) + + +************************ BlastAlign ************************ + +This program takes nucleotide sequences in fasta format and returns a multiple alignment (in Nexus & Phylip formats) using BLASTN + + +Will exclude sequences where gaps make up more than 95% of the sequence in the final alignment + +Input file ./locus10_sp2.fasta has 2 sequences and is 360 bytes +(maximum number of sequences that will be used to search for the reference sequence is 1767) + + +BlastAlign finished: it has produced a multiple alignment of 2 sequences and length 160 by aligning to sequence Pf51/11000160 (percentage of gaps in each sequence is less than 95%) + + +************************ BlastAlign ************************ + +This program takes nucleotide sequences in fasta format and returns a multiple alignment (in Nexus & Phylip formats) using BLASTN + + +Will exclude sequences where gaps make up more than 95% of the sequence in the final alignment + +Input file ./locus1_sp3.fasta has 3 sequences and is 540 bytes +(maximum number of sequences that will be used to search for the reference sequence is 1767) + + +BlastAlign finished: it has produced a multiple alignment of 3 sequences and length 160 by aligning to sequence Pf71/11000160 (percentage of gaps in each sequence is less than 95%) + diff -r 000000000000 -r aba551b2b79e test-data/test_05_output_BlastAlign.zip Binary file test-data/test_05_output_BlastAlign.zip has changed diff -r 000000000000 -r aba551b2b79e test-data/test_4_output_POGS_input_BlastAlign.zip Binary file test-data/test_4_output_POGS_input_BlastAlign.zip has changed