Mercurial > repos > abims-sbr > orthogroups_tool
diff orthogroups_tool.xml @ 1:3f862f346967 draft
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit cf1b9c905931ca2ca25faa4844d45c908756472f-dirty
| author | abims-sbr |
|---|---|
| date | Wed, 17 Jan 2018 09:02:12 -0500 |
| parents | f3600c96e961 |
| children | d557212b575b |
line wrap: on
line diff
--- a/orthogroups_tool.xml Fri Oct 27 10:38:52 2017 -0400 +++ b/orthogroups_tool.xml Wed Jan 17 09:02:12 2018 -0500 @@ -10,7 +10,8 @@ <requirements> <requirement type="package" version="2.7">python</requirement> - <requirement type="package" version="1.68">biopython</requirement> + <requirement type="package" version="1.12.0">numpy</requirement> + <requirement type="package" version="0.20.0">pandas</requirement> </requirements> <command> @@ -22,33 +23,67 @@ #end for #set $infiles = $infiles[:-1] - $__tool_directory__/scripts/format_transdecoder_headers.sh '$orthogroups' + $__tool_directory__/scripts/format_transdecoder_headers.sh '$orthogroups' tmp_orthogroups.txt && - python $__tool_directory__/scripts/filter_orthofinder.py '$orthogroups' $nbseq > '$output'; + python $__tool_directory__/scripts/filter_orthofinder.py tmp_orthogroups.txt $nbspec $nbseq + #if '$verbosity': + -v + #end if + #if '$paralogs': + -p + #end if + + > '$output'; ]]> </command> <inputs> <param name="orthogroups" type="data" format="txt" label="Select a file of Orthogroups" help="The output from OrthoFinder, 'Orthogroups.txt'."/> <param name="inputs_fasta" type="data" format="fasta" multiple="true" label="Outputs from Filter Assemblies" help="Contains the nucleic sequences to retrieve."/> + <param name="nbspec" type="integer" value="0" label="Number of studied species" help="Specifies the TOTAL (no less) number of species of the analysis"/> <param name="nbseq" type="integer" value="3" label="Minimal number of sequences per orthogroup" help="Drop orthogroups with less than x sequences :" /> + <param name="verbosity" type="boolean" label="verbosity" help="Turn on this paramater to display an additional summary table (computed before the pralogous filtering)" /> + <param name="paralogs" type="boolean" label="paralogs" help="Turn on this parameter to obtain an additional output (orthogroups files before paralogous filtering)" /> </inputs> <outputs> - <data format="txt" name="output" label="log_orthogroups" /> - <data format="csv" name="countings" label="summary_orthogroups.csv" from_work_dir="summary_orthogroups.csv"/> + <data format="txt" name="output" label="orthogroups_tool.output" /> <collection name="orthogroups_fasta" type="list" label="Orthogroups_fasta_files" > <discover_datasets pattern="__name_and_ext__" directory="filtered_orthogroups" /> </collection> + <collection name="orthogroups_fasta_paralogs" type="list" label="Orthogroups_with_paralogs_fasta_files" > + <discover_datasets pattern="__name_and_ext__" directory="orthogroups_withParalogs" /> + <filter>paralogs == True</filter> + </collection> </outputs> <tests> <test> <param name="orthogroups" value="Orthogroups.txt"/> - <param name="inputs_fasta" value="output_filterassemblies/AcAcaud_trinity.fasta,output_filterassemblies/AmAmphi_trinity.fasta,output_filterassemblies/ApApomp_trinity.fasta,output_filterassemblies/PfPfiji_trinity.fasta,output_filterassemblies/PgPgras_trinity.fasta,output_filterassemblies/PhPhess_trinity.fasta,output_filterassemblies/PuPunid_trinity.fasta,output_filterassemblies/TeTerlap_trinity.fasta,output_filterassemblies/ThThelep_trinity.fasta"/> - <param name="nbseq" value="3"/> - <output name="output" value="log_orthogroups.txt" lines_diff="2"/> - <output name="countings" value="summary_orthogroups.csv"/> + <param name="inputs_fasta" value="output_filterassemblies/AcAcaud_trinity.fasta,output_filterassemblies/AmAmphi_trinity.fasta,output_filterassemblies/ApApomp_trinity.fasta,output_filterassemblies/AsAsp1_trinity.fasta"/> + <param name="nbspec" value="4"/> + <param name="nbseq" value="2"/> + <param name="verbosity" value="True"/> + <param name="paralogs" value="True"/> + <output name="output" value="orthogroups_tool.output" lines_diff="2"/> + <output_collection name="orthogroups_fasta" type="list"> + <element name="orthogroup_1_3_sequences" value="filtered_orthogroups/orthogroup_1_3_sequences.fasta" /> + <element name="orthogroup_2_4_sequences" value="filtered_orthogroups/orthogroup_2_4_sequences.fasta" /> + <element name="orthogroup_3_2_sequences" value="filtered_orthogroups/orthogroup_3_2_sequences.fasta" /> + <element name="orthogroup_4_3_sequences" value="filtered_orthogroups/orthogroup_4_3_sequences.fasta" /> + <element name="orthogroup_5_3_sequences" value="filtered_orthogroups/orthogroup_5_3_sequences.fasta" /> + <element name="orthogroup_6_3_sequences" value="filtered_orthogroups/orthogroup_6_3_sequences.fasta" /> + <element name="orthogroup_7_2_sequences" value="filtered_orthogroups/orthogroup_7_2_sequences.fasta" /> + </output_collection> + <output_collection name="orthogroups_fasta_paralogs" type="list"> + <element name="orthogroup_1_8_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_1_8_sequences_withParalogs.fasta" /> + <element name="orthogroup_2_5_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_2_5_sequences_withParalogs.fasta" /> + <element name="orthogroup_3_3_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_3_3_sequences_withParalogs.fasta" /> + <element name="orthogroup_4_3_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_4_3_sequences_withParalogs.fasta" /> + <element name="orthogroup_5_3_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_5_3_sequences_withParalogs.fasta" /> + <element name="orthogroup_6_3_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_6_3_sequences_withParalogs.fasta" /> + <element name="orthogroup_7_2_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_7_2_sequences_withParalogs.fasta" /> + </output_collection> </test> </tests> @@ -60,7 +95,7 @@ ------------------------------------------- -**OrthoGroups** +**Description** This tool takes Orthogroups found by OrthoFinder and proceeds to retrieve nucleic sequences back, then write each orthogroups in its own fasta file. @@ -70,7 +105,7 @@ .. class:: warningmark -This tool is configured to work within the AdaptSearch toolsuite, which implies a specific format of headers. Indeed, due to the use of external tools (like TransDecoder), the genes IDs might have been modified. A regular expression rewrites all the genes IDs in order to have, for instance, this format : >Pf1004_1/1_1.000_369 +This tool is configured to work within the AdaptSearch toolsuite, which implies a specific format of headers. Indeed, due to the use of external tools (like TransDecoder), the genes IDs might have been modified. A regular expression rewrites all the genes IDs in order to have, for instance, this format : >Pf1004_1/1_1.000_369 ------------------------------------------- @@ -86,15 +121,30 @@ - File of Orthogroups : the orthogroups.txt file from OrthoFinder. - Output from Filter_Assemblies (either as multiple datasets or dataset collection) +- Total number of studied species on which the orthogroups have been built. - Minimal number of sequences : The orthogroups with less than the specified number won't be recorded. +- Verbose : If 'Yes', a supplementary table will be displayed in the outputs (coutings of species and sequences in orthogroups before the removal of paralogs). +- Paralogs : if 'Yes', there will be a supplementary output of orthogroups file, before the removal of paralogs. ------------------------------------------- **Outputs** - Dataset collection of fasta files : each file represents an orthogroup, each gene within tthe group has the couple ID-nucleic sequence. -- A csv file with countings. +- If the --paralogs option is checked : another dataset collection of fasta files with all the paralogous genes. - The tool log. + +--------- + +**The AdaptSearch Pipeline** + +.. image:: ../../adaptsearch_picture_helps.png :heigth: 593 :width: 852 + +Changelog +--------- + +**Version 1.0 - 11/01/2018** + ]]> </help>
