view POGs.xml @ 9:a76e4758dc44 draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 7c9540507076087e858141f9dd5df33409690dac
author abims-sbr
date Wed, 28 Feb 2018 06:04:15 -0500
parents 3fbf4e4d111e
children 05985273b735
line wrap: on
line source

<tool name="POGs" id="POGs" version="2.0.1">

	<description>
		Find orthologous groups
	</description>

	<macros>
		<import>macros.xml</import>
	</macros>

	<requirements>
		<expand macro="python_required" />
        <requirement type="package" version="0.20.0">pandas</requirement>
        <requirement type="package" version="1.12.0">numpy</requirement>
	</requirements>

  	<command>
	<![CDATA[
        #set $infiles_from_paiwise_dna = ""
        #for $input_from_paiwise_dna in $inputs_from_paiwise_dna
            ln -s '$input_from_paiwise_dna' '$input_from_paiwise_dna.element_identifier';
            #set $infiles_from_paiwise_dna = $infiles_from_paiwise_dna + $input_from_paiwise_dna.element_identifier + ","
        #end for
        #set $infiles_from_paiwise_dna = $infiles_from_paiwise_dna[:-1]

		python '$__tool_directory__/scripts/pogs.py' '$infiles_from_paiwise_dna' '$minspec' 
        
        #if $verbose=="T":
            -v
        #end if
        #if $paralogs=="T":
            -p
        #end if

        > ${log}

	]]>
  	</command>

 	<inputs>
        <param name="inputs_from_paiwise_dna" type="data" format="fasta" multiple="true" label="Input files from Pairwise DNA" />
		<param name="minspec" type="integer" value="3" label="Drop orthogroups with less than n species" />	
        <param name="verbose" type="boolean" checked="True" truevalue="T" falsevalue="F" label="Verbose" help="Display a summary table of orthogroups before paralogs filtering"/>
        <param name="paralogs" type="boolean" checked="False" truevalue="T" falsevalue="F" label="Paralogs" help="Return also orthogroups without paralogs filtering"/>
	</inputs>

	<outputs>
		<data format="txt" name="log" label="pogs.output" />
        <collection name="output" type="list" label="POGs_unaligned">
            <discover_datasets pattern="__name_and_ext__" directory="outputs" />
        </collection>
        <collection name="outputPara" type="list" label="POGs_withParalogs_unaligned">
            <discover_datasets pattern="__name_and_ext__" directory="outputs_withParalogs" />
            <filter>paralogs == True</filter>
        </collection>
	</outputs>

	<tests>
		<test>
            <param name="inputs_from_paiwise_dna" ftype="fasta" value="output_pairwise/RBH_AmAmphi_AcAcaud.fasta,output_pairwise/RBH_ApApomp_AcAcaud.fasta,output_pairwise/RBH_ApApomp_AmAmphi.fasta,output_pairwise/RBH_TeTerlap_AcAcaud.fasta,output_pairwise/RBH_TeTerlap_AmAmphi.fasta,output_pairwise/RBH_TeTerlap_ApApomp.fasta"/>
			<param name="minspec" value="3"/>
            <param name="verbose" value="True"/>
            <param name="paralogs" value="True"/>
			<output name="log" value="pogs.output" />
            <output_collection name="output" type="list">
                <element name="orthogroup_1_with_4_sequences" value="outputs/orthogroup_1_with_4_sequences.fasta" />
                <element name="orthogroup_2_with_4_sequences" value="outputs/orthogroup_2_with_4_sequences.fasta" />
                <element name="orthogroup_3_with_3_sequences" value="outputs/orthogroup_3_with_3_sequences.fasta" />
                <element name="orthogroup_4_with_4_sequences" value="outputs/orthogroup_4_with_4_sequences.fasta" />
                <element name="orthogroup_5_with_3_sequences" value="outputs/orthogroup_5_with_3_sequences.fasta" />
                <element name="orthogroup_6_with_4_sequences" value="outputs/orthogroup_6_with_4_sequences.fasta" />
                <element name="orthogroup_7_with_3_sequences" value="outputs/orthogroup_7_with_3_sequences.fasta" />
                <element name="orthogroup_8_with_3_sequences" value="outputs/orthogroup_8_with_3_sequences.fasta" />
                <element name="orthogroup_9_with_3_sequences" value="outputs/orthogroup_9_with_3_sequences.fasta" />
                <element name="orthogroup_10_with_3_sequences" value="outputs/orthogroup_10_with_3_sequences.fasta" />
            </output_collection>
            <output_collection name="outputPara" type="list">
                <element name="orthogroup_1_with_13_sequences_withParalogs" value="outputs_withParalogs/orthogroup_1_with_13_sequences_withParalogs.fasta" />
                <element name="orthogroup_2_with_7_sequences_withParalogs" value="outputs_withParalogs/orthogroup_2_with_7_sequences_withParalogs.fasta" />
                <element name="orthogroup_3_with_3_sequences_withParalogs" value="outputs_withParalogs/orthogroup_3_with_3_sequences_withParalogs.fasta" />
                <element name="orthogroup_4_with_7_sequences_withParalogs" value="outputs_withParalogs/orthogroup_4_with_7_sequences_withParalogs.fasta" />
                <element name="orthogroup_5_with_3_sequences_withParalogs" value="outputs_withParalogs/orthogroup_5_with_3_sequences_withParalogs.fasta" />
                <element name="orthogroup_6_with_4_sequences_withParalogs" value="outputs_withParalogs/orthogroup_6_with_4_sequences_withParalogs.fasta" />
                <element name="orthogroup_7_with_3_sequences_withParalogs" value="outputs_withParalogs/orthogroup_7_with_3_sequences_withParalogs.fasta" />
                <element name="orthogroup_8_with_5_sequences_withParalogs" value="outputs_withParalogs/orthogroup_8_with_5_sequences_withParalogs.fasta" />
                <element name="orthogroup_9_with_3_sequences_withParalogs" value="outputs_withParalogs/orthogroup_9_with_3_sequences_withParalogs.fasta" />
                <element name="orthogroup_10_with_4_sequences_withParalogs" value="outputs_withParalogs/orthogroup_10_with_4_sequences_withParalogs.fasta" />
            </output_collection>
		</test>
        <test>
            <param name="inputs_from_paiwise_dna" ftype="fasta" value="output_pairwise/RBH_AmAmphi_AcAcaud.fasta,output_pairwise/RBH_ApApomp_AcAcaud.fasta,output_pairwise/RBH_ApApomp_AmAmphi.fasta,output_pairwise/RBH_TeTerlap_AcAcaud.fasta,output_pairwise/RBH_TeTerlap_AmAmphi.fasta,output_pairwise/RBH_TeTerlap_ApApomp.fasta"/>
            <param name="minspec" value="3"/>
            <param name="verbose" value="True"/>
            <param name="paralogs" value="False"/>
            <output name="log" value="2_pogs.output" />
            <output_collection name="output" type="list">
                <element name="orthogroup_1_with_4_sequences" value="outputs/orthogroup_1_with_4_sequences.fasta" />
                <element name="orthogroup_2_with_4_sequences" value="outputs/orthogroup_2_with_4_sequences.fasta" />
                <element name="orthogroup_3_with_3_sequences" value="outputs/orthogroup_3_with_3_sequences.fasta" />
                <element name="orthogroup_4_with_4_sequences" value="outputs/orthogroup_4_with_4_sequences.fasta" />
                <element name="orthogroup_5_with_3_sequences" value="outputs/orthogroup_5_with_3_sequences.fasta" />
                <element name="orthogroup_6_with_4_sequences" value="outputs/orthogroup_6_with_4_sequences.fasta" />
                <element name="orthogroup_7_with_3_sequences" value="outputs/orthogroup_7_with_3_sequences.fasta" />
                <element name="orthogroup_8_with_3_sequences" value="outputs/orthogroup_8_with_3_sequences.fasta" />
                <element name="orthogroup_9_with_3_sequences" value="outputs/orthogroup_9_with_3_sequences.fasta" />
                <element name="orthogroup_10_with_3_sequences" value="outputs/orthogroup_10_with_3_sequences.fasta" />
            </output_collection>
        </test>
	</tests>	

	<help>

@HELP_AUTHORS@    

<![CDATA[

**Last Version** : Victor Mataigne and Gildas Le Corguillé

**Description**

This tool parses homologous sequences obtained by pairwise and gather sequences into groups of orthologous sequences.

--------

**Inputs**

The output of the tool 'Pairwise' (pairwise_DNA, nucleic format) (AdaptSearch suite), which consists in lists of homologous sequences between pairs of species obtained by RBH comparisons.

--------

**Parameters**

- Species minimum : drop orthogroups with less than n species : orthogroups with less than n species within won't be recorded.

- verbose : check 'Yes' if you want a supplementary table : countings of orthogroups before the paralogous sequences removal.

- paralogs : check 'Yes' if you also want to retrieve the orthogroups before the paralogous sequences removal.

.. class:: warningmark
The minimum number of species per group only applies to the main output files. In the case of the 'paralogs' option, this parameter is more likely the minimum amount of sequences to keep.

--------

**Outputs**

This tool, produces the following files :

- POGs : the general output, displaying countings tables of orthogroups.

- POGs_unaligned : a dataset collection which contains the groups of orthologous sequences (one group per file) in nucleic format, with paralogous sequences removed (only one sequence is kept per species and per group)

- POGs_withParalogs_unaligned : a similar dataset colelction in where paralogous sequences are kept. This output is available if you check 'Yes' to the 'paralogous' option.

---------

**The AdaptSearch Pipeline**

.. image:: adaptsearch_picture_helps.png

Changelog
---------

**Version 2.1 - 04/01/2018**

  - Rewritten with improved results and better code
  - Added options verbose and paralogs

**Version 2.0 - 11/07/2017**

 - Replace the zip between tools by Dataset Collection
 - Add option : paralogs filtering
 - Added a second functional test (to test the new option)
 - Code factoring
 - Removed zip format of input and output files ; replaced by dataset collections


**Version 1.0 - 13/04/2017**

 - TEST: Add funtional test with planemo
 - IMPROVEMENT: Use conda dependencies for python

    ]]>

	</help>

	<expand macro="citations" />

</tool>