diff pairwise.xml @ 9:1e0c547d88fe draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
author abims-sbr
date Tue, 03 Jul 2018 10:52:31 -0400
parents f1ee838a8966
children ae65822c1fb7
line wrap: on
line diff
--- a/pairwise.xml	Wed Feb 28 10:37:14 2018 -0500
+++ b/pairwise.xml	Tue Jul 03 10:52:31 2018 -0400
@@ -1,109 +1,179 @@
-<tool name="Pairwise" id="pairwise" version="2.0.2">
+<tool name="Pairwise" id="pairwise" version="2.1.1">
 
-	<description>
-		Run reciproque tblastx pairwise
-	</description>
+    <description>
+        Find homologous couples by blast with RBH
+    </description>
 
-	<macros>
-		<import>macros.xml</import>
-	</macros>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
 
-	<requirements>
-		<expand macro="python_required" />
-		<requirement type="package" version="2.2.22">blast-legacy</requirement>
-		<requirement type="package" version="1.3.1">samtools</requirement>
-	</requirements>
+    <requirements>
+        <expand macro="python_required" />
+        <requirement type="package" version="1.65">biopython</requirement>
+        <requirement type="package" version="0.9.22">diamond</requirement>
+        <requirement type="package" version="2.2.22">blast-legacy</requirement>      
+    </requirements>
 
-  	<command><![CDATA[
+    <command><![CDATA[
         #set $infiles = ""
         #for $input in $inputs
             ln -s '$input' '$input.element_identifier';
             #set $infiles = $infiles + $input.element_identifier + ","
         #end for
         #set $infiles = $infiles[:-1]
+        
+        ln -s $__tool_directory__/scripts/S02_04_keep_one_hit_from_blast.py . &&
+        ln -s $__tool_directory__/scripts/S03_run_second_blast.py . &&
+        ln -s $__tool_directory__/scripts/S05_find_rbh.py . &&        
 
-        ln -s $__tool_directory__/scripts/functions.py . &&
-        ln -s $__tool_directory__/scripts/S02_xxx_patron_pipeline.sh . &&
-        ln -s $__tool_directory__/scripts/S03_run_blast_with_k_filter.sh . &&
-        ln -s $__tool_directory__/scripts/S04_run_blast2_with_k_filter.sh . &&
-        ln -s $__tool_directory__/scripts/S05_script_extract_match_v20_blastx.py . &&
-        ln -s $__tool_directory__/scripts/S06_post_processing_of_pairwise.py . &&
-        ln -s $__tool_directory__/scripts/S07_format_match_get_back_nucleotides.py . &&
-        ln -s $__tool_directory__/scripts/S08_script_extract_match_v20_blastx.py . &&
-        ln -s $__tool_directory__/scripts/S09_post_processing_of_pairwise.py . &&
-        ln -s $__tool_directory__/scripts/S10_compare_list_pairs_for_reciprocal_best_hits_test.py . &&
-        ln -s $__tool_directory__/scripts/S11_post_processing_of_pairwise.py . &&
-        ln -s $__tool_directory__/scripts/S12_prot2dna.py . &&
+        python -W ignore $__tool_directory__/scripts/S01_run_first_blast.py $infiles ${e_value} $method
+        > ${output};
+    ]]></command>
 
-		python $__tool_directory__/scripts/S01_organize_rbh.py $infiles ${e_value} \${GALAXY_SLOTS:-1}
-		> ${output};
-  	]]></command>
+    <inputs>
+        <param name="inputs" type="data" format="fasta" multiple="true" label="Input fasta files" />
+        <param name="method" type="select" label="Alignment tool to use" help="tblastx is slow and sensitive, diamond is a lot quicker and less sensitive">
+            <option value="tblastx">tblastx</option>
+            <option value="diamond">Diamond</option>
+        </param>
+        <param name="e_value" type="float" value="1e-5" label="e_value" help="e-value for blast." />
+    </inputs>
+
+    <outputs>
+        <data format="txt" name="output" label="Pairwise" />
+        <collection name="output_fasta_dna" type="list" label="Pairwise_DNA">
+            <discover_datasets pattern="__name_and_ext__" directory="outputs_RBH_dna" />
+        </collection>
+    </outputs>
 
- 	<inputs>
-        <param name="inputs" type="data" format="fasta" multiple="true" label="Input files" />
-		<param name="e_value" type="float" value="1e-5" label="e_value" help="By default, it's 1e-5. you can write the e-value like this: 1e-5" />
-	</inputs>
-
-	<outputs>
-		<data format="txt" name="output" label="Pairwise" />
-        <collection name="output_fasta_dna" type="list" label="Pairwise DNA">
-            <discover_datasets pattern="__name_and_ext__" directory="outputs_dna" />
-        </collection>
-        <collection name="output_fasta_prot" type="list" label="Pairwise PROT">
-            <discover_datasets pattern="__name_and_ext__" directory="outputs_prot" />
-        </collection>
-	</outputs>
-
-	<tests>
+    <tests>
         <test>
-            <param name="inputs" ftype="fasta" value="inputs2/PfPfiji_trinity.fasta,inputs2/ApApomp_trinity.fasta,inputs2/AmAmphi_trinity.fasta,inputs2/AcAcaud_trinity.fasta" />
+            <param name="inputs" ftype="fasta" value="inputs_tblastx/AcAcaud_trinity.fasta,inputs_tblastx/AmAmphi_trinity.fasta,inputs_tblastx/ApApomp_trinity.fasta,inputs_tblastx/PfPfiji_trinity.fasta" />
             <param name="e-value" value="1e-5" />
+            <param name="method" value="tblastx" />
             <output_collection name="output_fasta_dna" type="list">
-                <element name="DNAalignment_corresponding_to_protein_from_RBH_AmAmphi_AcAcaud" value="outputs_dna2/DNAalignment_corresponding_to_protein_from_RBH_AmAmphi_AcAcaud.fasta" />                
-                <element name="DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AcAcaud" value="outputs_dna2/DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AcAcaud.fasta" />                
-                <element name="DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AmAmphi" value="outputs_dna2/DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AmAmphi.fasta" />
-                <element name="DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_ApApomp" value="outputs_dna2/DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_ApApomp.fasta" />
+                <element name="RBH_AcAcaud_AmAmphi_dna">
+                    <assert_contents>
+                        <has_text text=">Ac5_1/1_1.000_160"/>
+                        <has_text text="GCACCGGGATGCGGATTTGCTGACGATATGGCAAAAGCATTGTCAGCGTGCGGAACCTGTTTATGTCACACCA"/>
+                        <has_text text=">Am1_1/1_1.000_160"/>
+                        <has_text text=">Ac7_1/1_1.000_160"/>
+                        <has_text text="GCACCTAGAATTACCCGAAGTTGCTTGGCAATAGCGACACCTAACGGTCGCCATGATATTTGCAGGAAGAAGG"/>
+                        <has_text text=">Am3_1/1_1.000_160"/>
+                        <has_text text=">Ac6_1/1_1.000_160"/>
+                        <has_text text="CAGCCTACCACTGAGAAGAGATACTTCAACATGTCTTACTGGGGTAGAAGTGGTGGTCGTACAGCGGGTGGTA"/>
+                        <has_text text=">Am2_1/1_1.000_160"/>
+                    </assert_contents>
+                </element>
+                <element name="RBH_AcAcaud_PfPfiji_dna">
+                    <assert_contents>
+                        <has_text text=">Ac8_1/1_1.000_160"/>
+                        <has_text text="ATCAAAGAAGAGCAACATCGAGCTACTGGCACTGGCAATGGAATCCTAATTATAGCAGAAACAAGCACTGGTTG"/>
+                        <has_text text=">Pf8_1/1_1.000_160"/>
+                        <has_text text=">Ac5_1/1_1.000_160"/>
+                        <has_text text="GCACCGGGATGCGGATTTGCTGACGATATGGCAAAAGCATTGTCAGCGTGCGGAACCTGTTTATGTCACACCAC"/>
+                        <has_text text=">Pf7_1/1_1.000_160"/>
+                    </assert_contents>
+                </element>
+                <element name="RBH_ApApomp_PfPfiji_dna">
+                    <assert_contents>
+                        <has_text text=">Ap2_1/1_1.000_160"/>
+                        <has_text text="ATACTCAGGCACACAGCATTTGTCGTACTAGGCGAGAGAGAGAGAGGAACGACTAATTGCAACCACGATTA"/>
+                        <has_text text=">Pf5_1/1_1.000_160"/>
+                        <has_text text=">Ap1_1/1_1.000_160"/>
+                        <has_text text="GGTCGCCTTATAAAAACCAATCCGAAACAGTTTTCCTTTGAAACGTGCCAAAAACCAAAAACATACTTCAA"/>
+                        <has_text text=">Pf4_1/1_1.000_160"/>
+                        <has_text text=">Ap4_1/1_1.000_135"/>
+                        <has_text text="CGGCCGCGGCGCGTCGTTCTCAGCCAAGCTGACTTCGACTTGAGCCGTCCATTCGCTTATTTACACGACGA"/>
+                        <has_text text=">Pf10_1/1_1.000_160"/>
+                        <has_text text=">Ap3_1/1_1.000_160"/>
+                        <has_text text="GCCATGCAGTACACTGGACTTCTGTTATTCTGTTTGTTTGCCTTGACGGCAGCCAAACCCGCGGAAGACCT"/>
+                        <has_text text=">Pf6_1/1_1.000_160"/>
+                    </assert_contents>
+                </element>
+                <element name="RBH_AmAmphi_PfPfiji_dna">
+                    <assert_contents>
+                        <has_text text=">Am8_1/1_1.000_160"/>
+                        <has_text text="GTATTAATAAAAGGACAAGACTATTATTTAATACCAAGAAATCTGGCCTTAATAAGCATGGTTGCTTATATCAT"/>
+                        <has_text text=">Pf9_1/1_1.000_160"/>
+                        <has_text text=">Am1_1/1_1.000_160"/>
+                        <has_text text="GCACCGGGATGCGGATTTGCTGACGATATGGCAAAAGCATTGTCAGCGTGCGGAACCTGTTTATGTCACACCA"/>
+                        <has_text text=">Pf7_1/1_1.000_160"/>
+                    </assert_contents>
+                </element>
             </output_collection>
         </test>
-		<test>
-            <param name="inputs" ftype="fasta" value="inputs/PfPfiji_Trinity.fasta,inputs/ApApomp_Trinity.fasta,inputs/AmAmphi_Trinity.fasta,inputs/AcAcaud_Trinity.fasta" />
-			<param name="e-value" value="1e-5" />
-			<output name="output" >
-				<assert_contents>
-					<has_line line="('PfPfiji_Trinity.fasta', 'ApApomp_Trinity.fasta')"/>
-					<has_line line="('PfPfiji_Trinity.fasta', 'AmAmphi_Trinity.fasta')"/>
-					<has_line line="('PfPfiji_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/>
-					<has_line line="('ApApomp_Trinity.fasta', 'AmAmphi_Trinity.fasta')"/>
-					<has_line line="('ApApomp_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/>
-					<has_line line="('AmAmphi_Trinity.fasta', 'AcAcaud_Trinity.fasta')"/>
 
-					<has_text text="Number of pairwises parsed = 2" />
-					<has_text text="Number of pairwises parsed = 3" />
-					<has_text text="Number of pairwises parsed = 0" />
-					<has_text text="Number of pairwises parsed = 5" />
-					<has_text text="Number of pairwises parsed = 1" />
-				</assert_contents>
-			</output>
-            <output_collection name="output_fasta_prot" type="list">
-                <element name="ReciprocalBestHits_AmAmphi_AcAcaud" value="outputs_prot/ReciprocalBestHits_AmAmphi_AcAcaud.fasta" />
-                <element name="ReciprocalBestHits_ApApomp_AcAcaud" value="outputs_prot/ReciprocalBestHits_ApApomp_AcAcaud.fasta" />
-                <element name="ReciprocalBestHits_ApApomp_AmAmphi" value="outputs_prot/ReciprocalBestHits_ApApomp_AmAmphi.fasta" />
-                <element name="ReciprocalBestHits_PfPfiji_AcAcaud" value="outputs_prot/ReciprocalBestHits_PfPfiji_AcAcaud.fasta" />
-                <element name="ReciprocalBestHits_PfPfiji_AmAmphi" value="outputs_prot/ReciprocalBestHits_PfPfiji_AmAmphi.fasta" />
-                <element name="ReciprocalBestHits_PfPfiji_ApApomp" value="outputs_prot/ReciprocalBestHits_PfPfiji_ApApomp.fasta" />
+        <test>
+            <param name="inputs" ftype="fasta" value="inputs_diamond/AcAcaud_Trinity.fasta,inputs_diamond/AmAmphi_Trinity.fasta,inputs_diamond/ApApomp_Trinity.fasta,inputs_diamond/PfPfiji_Trinity.fasta" />
+            <param name="e-value" value="1e-5" />
+            <param name="method" value="diamond" />
+            <output_collection name="output_fasta_dna" type="list">
+                <element name="RBH_AcAcaud_AmAmphi_dna">
+                    <assert_contents>
+                        <has_text text=">Ac5_1/1_1.000_160"/>
+                        <has_text text="GCACCGGGATGCGGATTTGCTGACGATATGGCAAAAGCATTGTCAGCGTGCGGAACCTGTTTATGTCACACCA"/>
+                        <has_text text=">Am1_1/1_1.000_160"/>
+                        <has_text text=">Ac7_1/1_1.000_160"/>
+                        <has_text text="GCACCTAGAATTACCCGAAGTTGCTTGGCAATAGCGACACCTAACGGTCGCCATGATATTTGCAGGAAGAAGG"/>
+                        <has_text text=">Am3_1/1_1.000_160"/>
+                        <has_text text=">Ac6_1/1_1.000_160"/>
+                        <has_text text="CAGCCTACCACTGAGAAGAGATACTTCAACATGTCTTACTGGGGTAGAAGTGGTGGTCGTACAGCGGGTGGTA"/>
+                        <has_text text=">Am2_1/1_1.000_160"/>
+                    </assert_contents>
+                </element>
+                <element name="RBH_AcAcaud_ApApomp_dna">
+                    <assert_contents>
+                        <has_text text=">Ac23_1/1_1.000_366"/>
+                        <has_text text="ACTGAGGCTCGAACACAACAAAAAGGTGATGAGCAGTCAGCACTTAATAGAATATTACAGCAAGTAGCCAGTAA"/>
+                        <has_text text=">Ap46_1/1_1.000_217"/>
+                        <has_text text="CCACAACCAAGTGGCGGGTATAACAGCACCGAGGCTCGAACACAGCAAAAAGGTGATGAGCAGTCAGCTCTTAA"/>
+                    </assert_contents>
+                </element>
+                <element name="RBH_AcAcaud_PfPfiji_dna">
+                    <assert_contents>
+                        <has_text text=">Ac8_1/1_1.000_160"/>
+                        <has_text text="ATCAAAGAAGAGCAACATCGAGCTACTGGCACTGGCAATGGAATCCTAATTATAGCAGAAACAAGCACTGGTTG"/>
+                        <has_text text=">Pf8_1/1_1.000_160"/>
+                        <has_text text=">Ac5_1/1_1.000_160"/>
+                        <has_text text="GCACCGGGATGCGGATTTGCTGACGATATGGCAAAAGCATTGTCAGCGTGCGGAACCTGTTTATGTCACACCAC"/>
+                        <has_text text=">Pf7_1/1_1.000_160"/>
+                    </assert_contents>
+                </element>
+                <element name="RBH_AmAmphi_PfPfiji_dna">
+                    <assert_contents>
+                        <has_text text=">Am8_1/1_1.000_160"/>
+                        <has_text text="GTATTAATAAAAGGACAAGACTATTATTTAATACCAAGAAATCTGGCCTTAATAAGCATGGTTGCTTATATCAT"/>
+                        <has_text text=">Pf9_1/1_1.000_160"/>
+                        <has_text text=">Am1_1/1_1.000_160"/>
+                        <has_text text="GCACCGGGATGCGGATTTGCTGACGATATGGCAAAAGCATTGTCAGCGTGCGGAACCTGTTTATGTCACACCA"/>
+                        <has_text text=">Pf7_1/1_1.000_160"/>
+                    </assert_contents>
+                </element>
+                <element name="RBH_ApApomp_PfPfiji_dna">
+                    <assert_contents>
+                        <has_text text=">Ap2_1/1_1.000_160"/>
+                        <has_text text="ATACTCAGGCACACAGCATTTGTCGTACTAGGCGAGAGAGAGAGAGGAACGACTAATTGCAACCACGATTA"/>
+                        <has_text text=">Pf5_1/1_1.000_160"/>
+                        <!--
+                        <has_text text=">Ap1_1/1_1.000_160"/>
+                        <has_text text="GGTCGCCTTATAAAAACCAATCCGAAACAGTTTTCCTTTGAAACGTGCCAAAAACCAAAAACATACTTCAA"/>
+                        <has_text text=">Pf4_1/1_1.000_160"/>
+                        <has_text text=">Ap4_1/1_1.000_160"/>
+                        <has_text text="CGGCCGCGGCGCGTCGTTCTCAGCCAAGCTGACTTCGACTTGAGCCGTCCATTCGCTTATTTACACGACGA"/>
+                        <has_text text=">Pf10_1/1_1.000_160"/>
+                        <has_text text=">Ap3_1/1_1.000_160"/>
+                        <has_text text="GCCATGCAGTACACTGGACTTCTGTTATTCTGTTTGTTTGCCTTGACGGCAGCCAAACCCGCGGAAGACCT"/>
+                        <has_text text=">Pf6_1/1_1.000_160"/>
+                        -->
+                    </assert_contents>
+                </element>
             </output_collection>
-            <output_collection name="output_fasta_dna" type="list">
-                <element name="DNAalignment_corresponding_to_protein_from_RBH_AmAmphi_AcAcaud" value="outputs_dna/DNAalignment_corresponding_to_protein_from_RBH_AmAmphi_AcAcaud.fasta" />
-                <element name="DNAalignment_corresponding_to_protein_from_RBH_ApApomp_AcAcaud" value="outputs_dna/DNAalignment_corresponding_to_protein_from_RBH_ApApomp_AcAcaud.fasta" />
-                <element name="DNAalignment_corresponding_to_protein_from_RBH_ApApomp_AmAmphi" value="outputs_dna/DNAalignment_corresponding_to_protein_from_RBH_ApApomp_AmAmphi.fasta" />
-                <element name="DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AcAcaud" value="outputs_dna/DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AcAcaud.fasta" />
-                <element name="DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AmAmphi" value="outputs_dna/DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_AmAmphi.fasta" />
-                <element name="DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_ApApomp" value="outputs_dna/DNAalignment_corresponding_to_protein_from_RBH_PfPfiji_ApApomp.fasta" />
-            </output_collection>
-		</test>
-	</tests>
+        </test>
+    </tests>
 
-	<help>
+    <help>
 
  @HELP_AUTHORS@
  
@@ -119,6 +189,7 @@
 
     - 'Input files' : a collection of fasta files (one file per species)
     - 'e_value' : the blast e-value. By default it's 1e-5.
+    - 'Alignment tool' : choose the sequences alignment tool between tblastx and Diamond. tblastx is more sensitive and Diamond is much faster.
 
 --------
 
@@ -129,12 +200,6 @@
     - 'Pairwise_DNA' : the output which contains nucleic sequences (of the pairwise) that are homologous. The sequences are with nucleotides. It shows for both the query and match :
         the name        
         the sequence in nucleotides
-   
-    - 'Pairwise_PROT' : the output which contains proteic sequences (of the pairwise) that are homologous. The sequences are with protein. It shows :
-        Name, position, length, and part of the sequence in protein for query and match sequences
-        Divergence
-        Number of gaps 
-        Real divergence
 
 --------
 
@@ -147,6 +212,10 @@
 Changelog
 ---------
 
+**Version 2.1 - 03/07/2018**
+
+ - Add the possibility to use Diamond instead of tblastx
+
 **Version 2.0 - 18/04/2017**
 
  - NEW: Replace the zip between tools by Dataset Collection