Mercurial > repos > abims-sbr > pairwise
diff pairwise.xml @ 9:1e0c547d88fe draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
| author | abims-sbr |
|---|---|
| date | Tue, 03 Jul 2018 10:52:31 -0400 |
| parents | f1ee838a8966 |
| children | ae65822c1fb7 |
line wrap: on
line diff
--- a/pairwise.xml Wed Feb 28 10:37:14 2018 -0500 +++ b/pairwise.xml Tue Jul 03 10:52:31 2018 -0400 @@ -1,109 +1,179 @@ -<tool name="Pairwise" id="pairwise" version="2.0.2"> +<tool name="Pairwise" id="pairwise" version="2.1.1"> - <description> - Run reciproque tblastx pairwise - </description> + <description> + Find homologous couples by blast with RBH + </description> - <macros> - <import>macros.xml</import> - </macros> + <macros> + <import>macros.xml</import> + </macros> - <requirements> - <expand macro="python_required" /> - <requirement type="package" version="2.2.22">blast-legacy</requirement> - <requirement type="package" version="1.3.1">samtools</requirement> - </requirements> + <requirements> + <expand macro="python_required" /> + <requirement type="package" version="1.65">biopython</requirement> + <requirement type="package" version="0.9.22">diamond</requirement> + <requirement type="package" version="2.2.22">blast-legacy</requirement> + </requirements> - <command><![CDATA[ + <command><![CDATA[ #set $infiles = "" #for $input in $inputs ln -s '$input' '$input.element_identifier'; #set $infiles = $infiles + $input.element_identifier + "," #end for #set $infiles = $infiles[:-1] + + ln -s $__tool_directory__/scripts/S02_04_keep_one_hit_from_blast.py . && + ln -s $__tool_directory__/scripts/S03_run_second_blast.py . && + ln -s $__tool_directory__/scripts/S05_find_rbh.py . && - ln -s $__tool_directory__/scripts/functions.py . && - ln -s $__tool_directory__/scripts/S02_xxx_patron_pipeline.sh . && - ln -s $__tool_directory__/scripts/S03_run_blast_with_k_filter.sh . && - ln -s $__tool_directory__/scripts/S04_run_blast2_with_k_filter.sh . && - ln -s $__tool_directory__/scripts/S05_script_extract_match_v20_blastx.py . && - ln -s $__tool_directory__/scripts/S06_post_processing_of_pairwise.py . && - ln -s $__tool_directory__/scripts/S07_format_match_get_back_nucleotides.py . && - ln -s $__tool_directory__/scripts/S08_script_extract_match_v20_blastx.py . && - ln -s $__tool_directory__/scripts/S09_post_processing_of_pairwise.py . && - ln -s $__tool_directory__/scripts/S10_compare_list_pairs_for_reciprocal_best_hits_test.py . && - ln -s $__tool_directory__/scripts/S11_post_processing_of_pairwise.py . && - ln -s $__tool_directory__/scripts/S12_prot2dna.py . && + python -W ignore $__tool_directory__/scripts/S01_run_first_blast.py $infiles ${e_value} $method + > ${output}; + ]]></command> - python $__tool_directory__/scripts/S01_organize_rbh.py $infiles ${e_value} \${GALAXY_SLOTS:-1} - > ${output}; - ]]></command> + <inputs> + <param name="inputs" type="data" format="fasta" multiple="true" label="Input fasta files" /> + <param name="method" type="select" label="Alignment tool to use" help="tblastx is slow and sensitive, diamond is a lot quicker and less sensitive"> + <option value="tblastx">tblastx</option> + <option value="diamond">Diamond</option> + </param> + <param name="e_value" type="float" value="1e-5" label="e_value" help="e-value for blast." /> + </inputs> + + <outputs> + <data format="txt" name="output" label="Pairwise" /> + <collection name="output_fasta_dna" type="list" label="Pairwise_DNA"> + <discover_datasets pattern="__name_and_ext__" directory="outputs_RBH_dna" /> + </collection> + </outputs> - <inputs> - <param name="inputs" type="data" format="fasta" multiple="true" label="Input files" /> - <param name="e_value" type="float" value="1e-5" label="e_value" help="By default, it's 1e-5. you can write the e-value like this: 1e-5" /> - </inputs> - - <outputs> - <data format="txt" name="output" label="Pairwise" /> - <collection name="output_fasta_dna" type="list" label="Pairwise DNA"> - <discover_datasets pattern="__name_and_ext__" directory="outputs_dna" /> - </collection> - <collection name="output_fasta_prot" type="list" label="Pairwise PROT"> - <discover_datasets pattern="__name_and_ext__" directory="outputs_prot" /> - </collection> - </outputs> - - <tests> + <tests> <test> - <param name="inputs" ftype="fasta" value="inputs2/PfPfiji_trinity.fasta,inputs2/ApApomp_trinity.fasta,inputs2/AmAmphi_trinity.fasta,inputs2/AcAcaud_trinity.fasta" /> + <param name="inputs" ftype="fasta" value="inputs_tblastx/AcAcaud_trinity.fasta,inputs_tblastx/AmAmphi_trinity.fasta,inputs_tblastx/ApApomp_trinity.fasta,inputs_tblastx/PfPfiji_trinity.fasta" /> <param name="e-value" value="1e-5" /> + <param name="method" value="tblastx" /> <output_collection name="output_fasta_dna" type="list"> - <element name="DNAalignment_corresponding_to_protein_from_RBH_AmAmphi_AcAcaud" value="outputs_dna2/DNAalignment_corresponding_to_protein_from_RBH_AmAmphi_AcAcaud.fasta" /> - <element name="DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AcAcaud" value="outputs_dna2/DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AcAcaud.fasta" /> - <element name="DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AmAmphi" value="outputs_dna2/DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AmAmphi.fasta" /> - <element name="DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_ApApomp" value="outputs_dna2/DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_ApApomp.fasta" /> + <element name="RBH_AcAcaud_AmAmphi_dna"> + <assert_contents> + <has_text text=">Ac5_1/1_1.000_160"/> + <has_text text="GCACCGGGATGCGGATTTGCTGACGATATGGCAAAAGCATTGTCAGCGTGCGGAACCTGTTTATGTCACACCA"/> + <has_text text=">Am1_1/1_1.000_160"/> + <has_text text=">Ac7_1/1_1.000_160"/> + <has_text text="GCACCTAGAATTACCCGAAGTTGCTTGGCAATAGCGACACCTAACGGTCGCCATGATATTTGCAGGAAGAAGG"/> + <has_text text=">Am3_1/1_1.000_160"/> + <has_text text=">Ac6_1/1_1.000_160"/> + <has_text text="CAGCCTACCACTGAGAAGAGATACTTCAACATGTCTTACTGGGGTAGAAGTGGTGGTCGTACAGCGGGTGGTA"/> + <has_text text=">Am2_1/1_1.000_160"/> + </assert_contents> + </element> + <element name="RBH_AcAcaud_PfPfiji_dna"> + <assert_contents> + <has_text text=">Ac8_1/1_1.000_160"/> + <has_text text="ATCAAAGAAGAGCAACATCGAGCTACTGGCACTGGCAATGGAATCCTAATTATAGCAGAAACAAGCACTGGTTG"/> + <has_text text=">Pf8_1/1_1.000_160"/> + <has_text text=">Ac5_1/1_1.000_160"/> + <has_text text="GCACCGGGATGCGGATTTGCTGACGATATGGCAAAAGCATTGTCAGCGTGCGGAACCTGTTTATGTCACACCAC"/> + <has_text text=">Pf7_1/1_1.000_160"/> + </assert_contents> + </element> + <element name="RBH_ApApomp_PfPfiji_dna"> + <assert_contents> + <has_text text=">Ap2_1/1_1.000_160"/> + <has_text text="ATACTCAGGCACACAGCATTTGTCGTACTAGGCGAGAGAGAGAGAGGAACGACTAATTGCAACCACGATTA"/> + <has_text text=">Pf5_1/1_1.000_160"/> + <has_text text=">Ap1_1/1_1.000_160"/> + <has_text text="GGTCGCCTTATAAAAACCAATCCGAAACAGTTTTCCTTTGAAACGTGCCAAAAACCAAAAACATACTTCAA"/> + <has_text text=">Pf4_1/1_1.000_160"/> + <has_text text=">Ap4_1/1_1.000_135"/> + <has_text text="CGGCCGCGGCGCGTCGTTCTCAGCCAAGCTGACTTCGACTTGAGCCGTCCATTCGCTTATTTACACGACGA"/> + <has_text text=">Pf10_1/1_1.000_160"/> + <has_text text=">Ap3_1/1_1.000_160"/> + <has_text text="GCCATGCAGTACACTGGACTTCTGTTATTCTGTTTGTTTGCCTTGACGGCAGCCAAACCCGCGGAAGACCT"/> + <has_text text=">Pf6_1/1_1.000_160"/> + </assert_contents> + </element> + <element name="RBH_AmAmphi_PfPfiji_dna"> + <assert_contents> + <has_text text=">Am8_1/1_1.000_160"/> + <has_text text="GTATTAATAAAAGGACAAGACTATTATTTAATACCAAGAAATCTGGCCTTAATAAGCATGGTTGCTTATATCAT"/> + <has_text text=">Pf9_1/1_1.000_160"/> + <has_text text=">Am1_1/1_1.000_160"/> + <has_text text="GCACCGGGATGCGGATTTGCTGACGATATGGCAAAAGCATTGTCAGCGTGCGGAACCTGTTTATGTCACACCA"/> + <has_text text=">Pf7_1/1_1.000_160"/> + </assert_contents> + </element> </output_collection> </test> - <test> - <param name="inputs" ftype="fasta" value="inputs/PfPfiji_Trinity.fasta,inputs/ApApomp_Trinity.fasta,inputs/AmAmphi_Trinity.fasta,inputs/AcAcaud_Trinity.fasta" /> - <param name="e-value" value="1e-5" /> - <output name="output" > - <assert_contents> - <has_line line="('PfPfiji_Trinity.fasta', 'ApApomp_Trinity.fasta')"/> - <has_line line="('PfPfiji_Trinity.fasta', 'AmAmphi_Trinity.fasta')"/> - <has_line line="('PfPfiji_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/> - <has_line line="('ApApomp_Trinity.fasta', 'AmAmphi_Trinity.fasta')"/> - <has_line line="('ApApomp_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/> - <has_line line="('AmAmphi_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/> - <has_text text="Number of pairwises parsed = 2" /> - <has_text text="Number of pairwises parsed = 3" /> - <has_text text="Number of pairwises parsed = 0" /> - <has_text text="Number of pairwises parsed = 5" /> - <has_text text="Number of pairwises parsed = 1" /> - </assert_contents> - </output> - <output_collection name="output_fasta_prot" type="list"> - <element name="ReciprocalBestHits_AmAmphi_AcAcaud" value="outputs_prot/ReciprocalBestHits_AmAmphi_AcAcaud.fasta" /> - <element name="ReciprocalBestHits_ApApomp_AcAcaud" value="outputs_prot/ReciprocalBestHits_ApApomp_AcAcaud.fasta" /> - <element name="ReciprocalBestHits_ApApomp_AmAmphi" value="outputs_prot/ReciprocalBestHits_ApApomp_AmAmphi.fasta" /> - <element name="ReciprocalBestHits_PfPfiji_AcAcaud" value="outputs_prot/ReciprocalBestHits_PfPfiji_AcAcaud.fasta" /> - <element name="ReciprocalBestHits_PfPfiji_AmAmphi" value="outputs_prot/ReciprocalBestHits_PfPfiji_AmAmphi.fasta" /> - <element name="ReciprocalBestHits_PfPfiji_ApApomp" value="outputs_prot/ReciprocalBestHits_PfPfiji_ApApomp.fasta" /> + <test> + <param name="inputs" ftype="fasta" value="inputs_diamond/AcAcaud_Trinity.fasta,inputs_diamond/AmAmphi_Trinity.fasta,inputs_diamond/ApApomp_Trinity.fasta,inputs_diamond/PfPfiji_Trinity.fasta" /> + <param name="e-value" value="1e-5" /> + <param name="method" value="diamond" /> + <output_collection name="output_fasta_dna" type="list"> + <element name="RBH_AcAcaud_AmAmphi_dna"> + <assert_contents> + <has_text text=">Ac5_1/1_1.000_160"/> + <has_text text="GCACCGGGATGCGGATTTGCTGACGATATGGCAAAAGCATTGTCAGCGTGCGGAACCTGTTTATGTCACACCA"/> + <has_text text=">Am1_1/1_1.000_160"/> + <has_text text=">Ac7_1/1_1.000_160"/> + <has_text text="GCACCTAGAATTACCCGAAGTTGCTTGGCAATAGCGACACCTAACGGTCGCCATGATATTTGCAGGAAGAAGG"/> + <has_text text=">Am3_1/1_1.000_160"/> + <has_text text=">Ac6_1/1_1.000_160"/> + <has_text text="CAGCCTACCACTGAGAAGAGATACTTCAACATGTCTTACTGGGGTAGAAGTGGTGGTCGTACAGCGGGTGGTA"/> + <has_text text=">Am2_1/1_1.000_160"/> + </assert_contents> + </element> + <element name="RBH_AcAcaud_ApApomp_dna"> + <assert_contents> + <has_text text=">Ac23_1/1_1.000_366"/> + <has_text text="ACTGAGGCTCGAACACAACAAAAAGGTGATGAGCAGTCAGCACTTAATAGAATATTACAGCAAGTAGCCAGTAA"/> + <has_text text=">Ap46_1/1_1.000_217"/> + <has_text text="CCACAACCAAGTGGCGGGTATAACAGCACCGAGGCTCGAACACAGCAAAAAGGTGATGAGCAGTCAGCTCTTAA"/> + </assert_contents> + </element> + <element name="RBH_AcAcaud_PfPfiji_dna"> + <assert_contents> + <has_text text=">Ac8_1/1_1.000_160"/> + <has_text text="ATCAAAGAAGAGCAACATCGAGCTACTGGCACTGGCAATGGAATCCTAATTATAGCAGAAACAAGCACTGGTTG"/> + <has_text text=">Pf8_1/1_1.000_160"/> + <has_text text=">Ac5_1/1_1.000_160"/> + <has_text text="GCACCGGGATGCGGATTTGCTGACGATATGGCAAAAGCATTGTCAGCGTGCGGAACCTGTTTATGTCACACCAC"/> + <has_text text=">Pf7_1/1_1.000_160"/> + </assert_contents> + </element> + <element name="RBH_AmAmphi_PfPfiji_dna"> + <assert_contents> + <has_text text=">Am8_1/1_1.000_160"/> + <has_text text="GTATTAATAAAAGGACAAGACTATTATTTAATACCAAGAAATCTGGCCTTAATAAGCATGGTTGCTTATATCAT"/> + <has_text text=">Pf9_1/1_1.000_160"/> + <has_text text=">Am1_1/1_1.000_160"/> + <has_text text="GCACCGGGATGCGGATTTGCTGACGATATGGCAAAAGCATTGTCAGCGTGCGGAACCTGTTTATGTCACACCA"/> + <has_text text=">Pf7_1/1_1.000_160"/> + </assert_contents> + </element> + <element name="RBH_ApApomp_PfPfiji_dna"> + <assert_contents> + <has_text text=">Ap2_1/1_1.000_160"/> + <has_text text="ATACTCAGGCACACAGCATTTGTCGTACTAGGCGAGAGAGAGAGAGGAACGACTAATTGCAACCACGATTA"/> + <has_text text=">Pf5_1/1_1.000_160"/> + <!-- + <has_text text=">Ap1_1/1_1.000_160"/> + <has_text text="GGTCGCCTTATAAAAACCAATCCGAAACAGTTTTCCTTTGAAACGTGCCAAAAACCAAAAACATACTTCAA"/> + <has_text text=">Pf4_1/1_1.000_160"/> + <has_text text=">Ap4_1/1_1.000_160"/> + <has_text text="CGGCCGCGGCGCGTCGTTCTCAGCCAAGCTGACTTCGACTTGAGCCGTCCATTCGCTTATTTACACGACGA"/> + <has_text text=">Pf10_1/1_1.000_160"/> + <has_text text=">Ap3_1/1_1.000_160"/> + <has_text text="GCCATGCAGTACACTGGACTTCTGTTATTCTGTTTGTTTGCCTTGACGGCAGCCAAACCCGCGGAAGACCT"/> + <has_text text=">Pf6_1/1_1.000_160"/> + --> + </assert_contents> + </element> </output_collection> - <output_collection name="output_fasta_dna" type="list"> - <element name="DNAalignment_corresponding_to_protein_from_RBH_AmAmphi_AcAcaud" value="outputs_dna/DNAalignment_corresponding_to_protein_from_RBH_AmAmphi_AcAcaud.fasta" /> - <element name="DNAalignment_corresponding_to_protein_from_RBH_ApApomp_AcAcaud" value="outputs_dna/DNAalignment_corresponding_to_protein_from_RBH_ApApomp_AcAcaud.fasta" /> - <element name="DNAalignment_corresponding_to_protein_from_RBH_ApApomp_AmAmphi" value="outputs_dna/DNAalignment_corresponding_to_protein_from_RBH_ApApomp_AmAmphi.fasta" /> - <element name="DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AcAcaud" value="outputs_dna/DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AcAcaud.fasta" /> - <element name="DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AmAmphi" value="outputs_dna/DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AmAmphi.fasta" /> - <element name="DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_ApApomp" value="outputs_dna/DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_ApApomp.fasta" /> - </output_collection> - </test> - </tests> + </test> + </tests> - <help> + <help> @HELP_AUTHORS@ @@ -119,6 +189,7 @@ - 'Input files' : a collection of fasta files (one file per species) - 'e_value' : the blast e-value. By default it's 1e-5. + - 'Alignment tool' : choose the sequences alignment tool between tblastx and Diamond. tblastx is more sensitive and Diamond is much faster. -------- @@ -129,12 +200,6 @@ - 'Pairwise_DNA' : the output which contains nucleic sequences (of the pairwise) that are homologous. The sequences are with nucleotides. It shows for both the query and match : the name the sequence in nucleotides - - - 'Pairwise_PROT' : the output which contains proteic sequences (of the pairwise) that are homologous. The sequences are with protein. It shows : - Name, position, length, and part of the sequence in protein for query and match sequences - Divergence - Number of gaps - Real divergence -------- @@ -147,6 +212,10 @@ Changelog --------- +**Version 2.1 - 03/07/2018** + + - Add the possibility to use Diamond instead of tblastx + **Version 2.0 - 18/04/2017** - NEW: Replace the zip between tools by Dataset Collection
