diff orthogroups_tool.xml @ 1:3f862f346967 draft

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f-dirty
author abims-sbr
date Wed, 17 Jan 2018 09:02:12 -0500
parents f3600c96e961
children d557212b575b
line wrap: on
line diff
--- a/orthogroups_tool.xml	Fri Oct 27 10:38:52 2017 -0400
+++ b/orthogroups_tool.xml	Wed Jan 17 09:02:12 2018 -0500
@@ -10,7 +10,8 @@
 
     <requirements>
         <requirement type="package" version="2.7">python</requirement>
-        <requirement type="package" version="1.68">biopython</requirement>
+        <requirement type="package" version="1.12.0">numpy</requirement>
+        <requirement type="package" version="0.20.0">pandas</requirement>
     </requirements>
     
     <command>
@@ -22,33 +23,67 @@
         #end for
         #set $infiles = $infiles[:-1]
 
-        $__tool_directory__/scripts/format_transdecoder_headers.sh '$orthogroups'
+        $__tool_directory__/scripts/format_transdecoder_headers.sh '$orthogroups' tmp_orthogroups.txt
         &&
-        python $__tool_directory__/scripts/filter_orthofinder.py '$orthogroups' $nbseq > '$output';
+        python $__tool_directory__/scripts/filter_orthofinder.py tmp_orthogroups.txt $nbspec $nbseq               
+        #if '$verbosity':
+            -v
+        #end if
+        #if '$paralogs':
+            -p
+        #end if
+
+        > '$output';
     ]]> 
     </command>
 
     <inputs>
         <param name="orthogroups" type="data" format="txt" label="Select a file of Orthogroups" help="The output from OrthoFinder, 'Orthogroups.txt'."/>
         <param name="inputs_fasta" type="data" format="fasta" multiple="true" label="Outputs from Filter Assemblies" help="Contains the nucleic sequences to retrieve."/>
+        <param name="nbspec" type="integer" value="0" label="Number of studied species" help="Specifies the TOTAL (no less) number of species of the analysis"/>
         <param name="nbseq" type="integer" value="3" label="Minimal number of sequences per orthogroup" help="Drop orthogroups with less than x sequences :" />
+        <param name="verbosity" type="boolean" label="verbosity" help="Turn on this paramater to display an additional summary table (computed before the pralogous filtering)" />
+        <param name="paralogs" type="boolean" label="paralogs" help="Turn on this parameter to obtain an additional output (orthogroups files before paralogous filtering)" />
     </inputs>
 
     <outputs>
-        <data format="txt" name="output" label="log_orthogroups" />
-        <data format="csv" name="countings" label="summary_orthogroups.csv" from_work_dir="summary_orthogroups.csv"/>
+        <data format="txt" name="output" label="orthogroups_tool.output" />        
         <collection name="orthogroups_fasta" type="list" label="Orthogroups_fasta_files" >
             <discover_datasets pattern="__name_and_ext__" directory="filtered_orthogroups" />
         </collection>
+        <collection name="orthogroups_fasta_paralogs" type="list" label="Orthogroups_with_paralogs_fasta_files" >
+            <discover_datasets pattern="__name_and_ext__" directory="orthogroups_withParalogs" />
+            <filter>paralogs == True</filter>
+        </collection>
     </outputs>
 
     <tests>
         <test>
             <param name="orthogroups" value="Orthogroups.txt"/>
-            <param name="inputs_fasta" value="output_filterassemblies/AcAcaud_trinity.fasta,output_filterassemblies/AmAmphi_trinity.fasta,output_filterassemblies/ApApomp_trinity.fasta,output_filterassemblies/PfPfiji_trinity.fasta,output_filterassemblies/PgPgras_trinity.fasta,output_filterassemblies/PhPhess_trinity.fasta,output_filterassemblies/PuPunid_trinity.fasta,output_filterassemblies/TeTerlap_trinity.fasta,output_filterassemblies/ThThelep_trinity.fasta"/>
-            <param name="nbseq" value="3"/>
-            <output name="output" value="log_orthogroups.txt" lines_diff="2"/>
-            <output name="countings" value="summary_orthogroups.csv"/>
+            <param name="inputs_fasta" value="output_filterassemblies/AcAcaud_trinity.fasta,output_filterassemblies/AmAmphi_trinity.fasta,output_filterassemblies/ApApomp_trinity.fasta,output_filterassemblies/AsAsp1_trinity.fasta"/>
+            <param name="nbspec" value="4"/>
+            <param name="nbseq" value="2"/>
+            <param name="verbosity" value="True"/>
+            <param name="paralogs" value="True"/>
+            <output name="output" value="orthogroups_tool.output" lines_diff="2"/>
+            <output_collection name="orthogroups_fasta" type="list">
+                <element name="orthogroup_1_3_sequences" value="filtered_orthogroups/orthogroup_1_3_sequences.fasta" />
+                <element name="orthogroup_2_4_sequences" value="filtered_orthogroups/orthogroup_2_4_sequences.fasta" />
+                <element name="orthogroup_3_2_sequences" value="filtered_orthogroups/orthogroup_3_2_sequences.fasta" />
+                <element name="orthogroup_4_3_sequences" value="filtered_orthogroups/orthogroup_4_3_sequences.fasta" />
+                <element name="orthogroup_5_3_sequences" value="filtered_orthogroups/orthogroup_5_3_sequences.fasta" />
+                <element name="orthogroup_6_3_sequences" value="filtered_orthogroups/orthogroup_6_3_sequences.fasta" />
+                <element name="orthogroup_7_2_sequences" value="filtered_orthogroups/orthogroup_7_2_sequences.fasta" />
+            </output_collection>
+            <output_collection name="orthogroups_fasta_paralogs" type="list">
+                <element name="orthogroup_1_8_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_1_8_sequences_withParalogs.fasta" />
+                <element name="orthogroup_2_5_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_2_5_sequences_withParalogs.fasta" />
+                <element name="orthogroup_3_3_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_3_3_sequences_withParalogs.fasta" />
+                <element name="orthogroup_4_3_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_4_3_sequences_withParalogs.fasta" />
+                <element name="orthogroup_5_3_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_5_3_sequences_withParalogs.fasta" />
+                <element name="orthogroup_6_3_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_6_3_sequences_withParalogs.fasta" />
+                <element name="orthogroup_7_2_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_7_2_sequences_withParalogs.fasta" />
+            </output_collection>
         </test>
     </tests>
 
@@ -60,7 +95,7 @@
 
 -------------------------------------------
 
-**OrthoGroups**
+**Description**
 
 This tool takes Orthogroups found by OrthoFinder and proceeds to retrieve nucleic sequences back, then write each orthogroups in its own fasta file.
 
@@ -70,7 +105,7 @@
 
 .. class:: warningmark
 
-This tool is configured to work within the AdaptSearch toolsuite, which implies a specific format of headers. Indeed, due to the use of external tools (like TransDecoder), the genes IDs might have been modified. A regular expression rewrites all the genes IDs in order to have, for instance, this format : &gt;Pf1004_1/1_1.000_369
+This tool is configured to work within the AdaptSearch toolsuite, which implies a specific format of headers. Indeed, due to the use of external tools (like TransDecoder), the genes IDs might have been modified. A regular expression rewrites all the genes IDs in order to have, for instance, this format : >Pf1004_1/1_1.000_369
 
 -------------------------------------------
 
@@ -86,15 +121,30 @@
 
 - File of Orthogroups : the orthogroups.txt file from OrthoFinder.
 - Output from Filter_Assemblies (either as multiple datasets or dataset collection)
+- Total number of studied species on which the orthogroups have been built.
 - Minimal number of sequences : The orthogroups with less than the specified number won't be recorded.
+- Verbose : If 'Yes', a supplementary table will be displayed in the outputs (coutings of species and sequences in orthogroups before the removal of paralogs).
+- Paralogs : if 'Yes', there will be a supplementary output of orthogroups file, before the removal of paralogs.
 
 -------------------------------------------
 
 **Outputs**
 
 - Dataset collection of fasta files : each file represents an orthogroup, each gene within tthe group has the couple ID-nucleic sequence.
-- A csv file with countings.
+- If the --paralogs option is checked : another dataset collection of fasta files with all the paralogous genes.
 - The tool log.
+
+---------
+
+**The AdaptSearch Pipeline**
+
+.. image:: ../../adaptsearch_picture_helps.png :heigth: 593 :width: 852
+
+Changelog
+---------
+
+**Version 1.0 - 11/01/2018**
+
 ]]>
     </help>