view ConcatPhyl.xml @ 9:ff1a3a790363 draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit f1ba8d136e0129f3e8435b25a95f70f697d51464-dirty
author abims-sbr
date Tue, 03 Jul 2018 10:55:20 -0400
parents ad3446d7f794
children 5cae58aa1181
line wrap: on
line source

<tool name="ConcatPhyl" id="concatphyl" version="2.0.2">

	<description>
		Concatenation and phylogeny
	</description>

	<macros>
		<import>macros.xml</import>
	</macros>

	<requirements>
		<expand macro="python_required" />
		<requirement type="package" version="8.2.9">raxml</requirement>
	</requirements>

  	<command><![CDATA[
        #set $infiles_filter_assemblies = ""
        #for $input_filter_assemblie in $input_filter_assemblies
            ln -s '$input_filter_assemblie' '$input_filter_assemblie.element_identifier';
            #set $infiles_filter_assemblies = $infiles_filter_assemblies + $input_filter_assemblie.element_identifier + ","
        #end for
        #set $infiles_filter_assemblies = $infiles_filter_assemblies[:-1]
        
        #for $input_alignment in $input_alignments
            ln -s '$input_alignment' '$input_alignment.element_identifier';
            echo '$input_alignment.element_identifier' >> list_files;            
        #end for        

        python $__tool_directory__/scripts/S01_concatenate.py 

        $infiles_filter_assemblies

        #if $format.format_run == "nucleic" :
            nucleic
        #elif $format.format_run == "proteic" :
            proteic
        #end if

        list_files
        
        > ${output};

        raxmlHPC -n galaxy_run
        #if $format.format_run == "nucleic" :
            ##-q 05_partitions_gene_NUC
            -s "03_Concatenation_nuc.phy"
            -m $format.base_model
        #elif $format.format_run == "proteic" :
            ##-q 06_partitions_gene_AA
            -s 02_Concatenation_aa.phy
            -m $format.base_model$format.aa_search_matrix
        #end if

        -p $random_seed
        
        #if $number_of_runs !="" and $number_of_runs_bootstop =="":
            -N $number_of_runs
            -x $rapid_bootstrap_random_seed
        #elif ($number_of_runs !="" and $number_of_runs_bootstop !="") or ($number_of_runs =="" and $number_of_runs_bootstop !=""):
            -N $number_of_runs_bootstop
            -x $rapid_bootstrap_random_seed
        #end if

        -f $search_algorithm

        >> ${output};
    ]]>
  	</command>

 	<inputs>

		<param name="input_filter_assemblies" type="data" format="fasta" multiple="true" label="Files from Filter assemblies" />
        <param name="input_alignments" type="data" format="fasta" multiple="true" label="Aligned files without indels" help="nucleic or proteic format according to the analysis you want to do below"/>

		<conditional name="format">
			<param name="format_run" type="select" label="Which format do you want to use for this tool (concatenation and RAxML run) ? ">
				<option value="nucleic">Nucleic format</option>
				<option value="proteic">Proteic format</option>
			</param>

			<when value="nucleic">				
		    	<param name="base_model" type="select" label="Substitution Model">
		        	<option value="GTRCAT">GTRCAT</option>
		        	<option value="GTRCATI">GTRCATI</option>
		        	<option value="GTRGAMMA" selected="true">GTRGAMMA</option>
		        	<option value="GTRGAMMAI">GTRGAMMAI</option>
		   		</param>
			</when>

			<when value="proteic">			
		    	<param name="base_model" type="select" label="Substitution Model (-m)">
					<option value="PROTCAT" selected="true">PROTCAT</option>
					<option value="PROTCATI">PROTCATI</option>
					<option value="PROTGAMMA">PROTGAMMA</option>
			    	<option value="PROTGAMMAI">PROTGAMMAI</option>
		    	</param>
		    	<param name="aa_search_matrix" type="select" label="Matrix">
					<option value="DAYHOFF" selected="true">DAYHOFF</option>
					<option value="JTT">JTT</option>
					<option value="WAG">WAG</option>
					<option value="BLOSUM62">BLOSUM62</option>
			    </param>
			</when>
		</conditional>

		<param name="random_seed" type="integer" value="1234567890" size="12" label="Random seed used for the parsimony inferences" />

		<!-- ## (-N/#) -->
		<param name="number_of_runs" type="integer" size="8" value="100"
            label="Number of runs" help="Specify the number of
            alternative runs (-N|#) on distinct starting trees In combination
            with the '-b' option will invoke a multiple boostrap analysis.
            You can add the bootstopping criteria by choosing the autoMR,
            autoMRE, autoMRE_IGN, or autoFC value in a menu below instead of
            providing a number here. Bootstopping will only work in
            combination with '-x' or '-b'."
            optional="True" />
		<param name="number_of_runs_bootstop" type="select" label="Use bootstopping criteria for number of runs" optional="True">
	    	<option value="" selected="yes"></option>
	    	<option value="autoMR">autoMR</option>
	    	<option value="autoMRE">autoMRE</option>
	    	<option value="autoMRE_IGN">autoMRE_IGN</option>
	    	<option value="autoFC">autoFC</option>
		</param>

		<!-- ## (-f) -->
   		<param name="search_algorithm" type="select" label="Algorithm to execute" optional="True">
            <option value="a" selected="true">Rapid bootstrap and best ML tree search (a)</option>
            <option value="A">Compute marginal ancestral states (A)</option>
            <option value="b">Draw bipartition information (b)</option>
            <option value="c">Check if the alignment can be read (c)</option>
            <option value="d">Hill-climbing ML Search (d) (default)</option>
            <option value="e">Optimize GAMMA/GAMMAI model/branches (e)</option>
            <option value="g">Compute per-site log likelihoods for -z trees (g)</option>
            <option value="h">Compute log likelihood test for -t / -z trees (h)</option>
            <option value="j">Generate bootstrapped alignment files (j)</option>
            <option value="J">Compute SH-like support values for the -t tree (J)</option>
            <option value="m">Compare bipartitions between -t and -z trees (m)</option>
            <option value="n">Compute log likelihood score for -z trees (n)</option>
            <option value="o">Use old slower search algorithm (o)</option>
            <option value="p">Stepwise MP addition of new sequences (p)</option>
            <option value="q">Fast quartet calculator (q)</option>
            <option value="r">Compute pairwise RF distances in -z trees (r)</option>
            <option value="s">Split a multi-gene alignment (s)</option>
            <option value="S">Compute site-specific placement bias (S)</option>
            <option value="t">Randomized tree searches on a fixed starting tree (t)</option>
            <option value="T">Final optimization of a ML tree from a bootstrap (T)</option>
            <option value="u">Morphological weight calibration using ML on a -t tree (u)</option>
            <option value="v">Classify environmental sequences (v)</option>
            <option value="w">Compute ELW-test on -z trees (w)</option>
            <option value="x">Compute GAMMA model pair-wise ML distances on a tree (x)</option>
            <option value="y">Classify environmental sequences into a reference tree (y)</option>
        </param>

        <!-- ## (-q) -->
	  	<param name="multiple_model" format="txt" type="data" label="Multiple model assignment to alignment partitions" optional="True" help="Specify the file name which contains the assignment of models to alignment partitions for multiple models of substitution. For the syntax of this file please consult the manual." />

	 	<!-- ## (-x) -->
         <param name="rapid_bootstrap_random_seed" type="integer" value='12345' size="7" label="Rapid bootstrapping random seed" optional="True" help="Specify a random seed and turn on rapid bootstrapping. CAUTION: unlike in version 7.0.4 RAxML will conduct rapid BS replicates under the model of rate heterogeneity you specified via '-m' and not by default under CAT." />

		<param name="out" type="select" label="What format of file do you want for your output (concatenation of the sequences) ? ">
			<option value="nothing">No output</option>
			<option value="fasta">Fasta format</option>
			<option value="phylip">Phylip format</option>
			<option value="nexus">Nexus format</option>
		</param>
		
		<param name="raxml1" type="boolean" label="Do you want the output of RAxML : best tree ? " />
		<param name="raxml3" type="boolean" label="Do you want the output of RAxML : bi-partition ? " />
		<param name="raxml4" type="boolean" label="Do you want the output of RAxML : bootstrap ? " help="Only if the option 'rapid bootsptrap' is chosen. When you don't want to choose your options, this output is accessible"/>

	</inputs>

	<outputs>
		<data name="output" format="txt" label="Phylogeny"/>

		<data name="out_fasta_aa" format="fasta" label="Phylogeny_concatenation_fasta_aa" from_work_dir="02_Concatenation_aa.fas">
			<filter>format['format_run'] == "proteic" and out == "fasta"</filter>
		</data>

		<data name="out_phylip_aa" format="phylip" label="Phylogeny_concatenation_phylip_aa" from_work_dir="02_Concatenation_aa.phy">
			<filter>format['format_run'] == "proteic" and out == "phylip"</filter>
		</data>

		<data name="out_nexus_aa" format="nexus" label="Phylogeny_concatenation_nexus_aa" from_work_dir="02_Concatenation_aa.nex">
			<filter>format['format_run'] == "proteic" and out == "nexus"</filter>
		</data>

		<data name="out_fasta_nuc" format="fasta" label="Phylogeny_concatenation_fasta_nuc" from_work_dir="03_Concatenation_nuc.fas">
			<filter>format['format_run'] == "nucleic" and out == "fasta"</filter>
		</data>

		<data name="out_phylip_nuc" format="phylip" label="Phylogeny_concatenation_phylip_nuc" from_work_dir="03_Concatenation_nuc.phy">
			<filter>format['format_run'] == "nucleic" and out == "phylip"</filter>
		</data>

		<data name="out_nexus_nuc" format="nexus" label="Phylogeny_concatenation_nexus_nuc" from_work_dir="03_Concatenation_nuc.nex">
			<filter>format['format_run'] == "nucleic" and out == "nexus"</filter>
		</data>

		<data name="out_raxml1" format="nhx" label="Phylogeny_RAxML_BestTree" from_work_dir="RAxML_bestTree.galaxy_run">
			<filter>raxml1 == True</filter>
		</data>

		<data name="out_raxml3" format="nhx" label="Phylogeny_RAxML_BiPartition" from_work_dir="RAxML_bipartitions.galaxy_run">
			<filter>raxml3 == True</filter>
		</data>

		<data name="out_raxml4" format="txt" label="Phylogeny_RAxML_BootStrap" from_work_dir="RAxML_bootstrap.galaxy_run">
			<filter>raxml4 == True</filter>
		</data>
	</outputs>

	<tests>
		<test>
            <param name="input_filter_assemblies" ftype="fasta" value="input_filter_assemblies/AcAcaud_trinity.fasta,input_filter_assemblies/AmAmphi_trinity.fasta,input_filter_assemblies/ApApomp_trinity.fasta,input_filter_assemblies/PgPgras_trinity.fasta,input_filter_assemblies/PhPhess_trinity.fasta,input_filter_assemblies/ThThelep_trinity.fasta" />
            <param name="input_alignments" ftype="fasta" value="input_from_CDS_Search/orthogroup_17_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_147_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_183_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_334_sp3_sp3.fasta" />
            <conditional name="format">
				<param name="format_run" value="nucleic" />
				<param name="base_model" value="GTRGAMMA" />
			</conditional>
			<param name="random_seed" value="1234567890" />
			<param name="number_of_runs" value="100" />
			<param name="number_of_runs_bootstop" value="" />
			<param name="search_algorithm" value="d" />
			<!-- <param name="multiple_model" value="" /> -->
			<param name="rapid_bootstrap_random_seed" value="123456789" />
			<param name="out" value="nothing" />
			<param name="raxml1" value="True" />
			<param name="raxml3" value="True" />
			<param name="raxml4" value="True" />
			<output name="out_raxml4">
				<assert_contents>
					<has_text text="((Pg,(Am,Th)),(Ph,Ap),Ac);"/>
					<has_text text="((Th,(Pg,Am)),(Ph,Ap),Ac);"/>
					<has_text text="((Ph,Ap),(Am,(Pg,Th)),Ac);"/>
				</assert_contents>
			</output>
		</test>

        <test>
            <param name="input_filter_assemblies" ftype="fasta" value="input_filter_assemblies/AcAcaud_trinity.fasta,input_filter_assemblies/AmAmphi_trinity.fasta,input_filter_assemblies/ApApomp_trinity.fasta,input_filter_assemblies/PgPgras_trinity.fasta,input_filter_assemblies/PhPhess_trinity.fasta,input_filter_assemblies/ThThelep_trinity.fasta" />
            <param name="input_alignments" ftype="fasta" value="input_from_CDS_Search/orthogroup_17_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_147_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_183_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_334_sp3_sp3.fasta" />
            <conditional name="format">
                <param name="format_run" value="nucleic" />
                <param name="base_model" value="GTRGAMMA" />
            </conditional>
            <param name="random_seed" value="1234567890" />
            <param name="number_of_runs" value="100" />
            <param name="number_of_runs_bootstop" value="" />
            <param name="search_algorithm" value="a" />            
            <param name="rapid_bootstrap_random_seed" value="1234567890" />
            <param name="out" value="nothing" />
            <param name="raxml1" value="True" />
            <param name="raxml3" value="True" />
            <param name="raxml4" value="True" />
            <output name="out_raxml1" value="RAxML_bestTree.nwk"/>     
            <output name="out_raxml3" value="RAxML_bipartitions.nwk"/>                
        </test>
        
        <test>
            <param name="input_filter_assemblies" ftype="fasta" value="input_filter_assemblies/AcAcaud_trinity.fasta,input_filter_assemblies/AmAmphi_trinity.fasta,input_filter_assemblies/ApApomp_trinity.fasta,input_filter_assemblies/PgPgras_trinity.fasta,input_filter_assemblies/PhPhess_trinity.fasta,input_filter_assemblies/ThThelep_trinity.fasta" />
            <param name="input_alignments" ftype="fasta" value="input_from_CDS_Search/orthogroup_17_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_147_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_183_sp3_sp3.fasta,input_from_CDS_Search/orthogroup_334_sp3_sp3.fasta" />
            <conditional name="format">
                <param name="format_run" value="nucleic" />
                <param name="base_model" value="GTRGAMMA" />
            </conditional>
            <param name="random_seed" value="1234567890" />
            <param name="number_of_runs" value="100" />
            <param name="number_of_runs_bootstop" value="autoMR" />
            <param name="search_algorithm" value="a" />            
            <param name="rapid_bootstrap_random_seed" value="1234567890" />
            <param name="out" value="nothing" />
            <param name="raxml1" value="True" />
            <param name="raxml3" value="True" />
            <param name="raxml4" value="True" />
            <output name="out_raxml1" value="RAxML_bestTree_test3.nwk"/>     
            <output name="out_raxml3" value="RAxML_bipartitions_test3.nwk"/>                
        </test>
	</tests>

	<help>

@HELP_AUTHORS@

<![CDATA[

.. class:: infomark

**Authors**
The run RAxML was written by Alexandros Stamatakis.
The script was written by Eric Fontanillas.

.. class:: infomark

**Galaxy integration** by Julie Baffard and ABiMS team.

--------

**Description**

This tool takes files containing fasta sequences (from the CDS_Search in the AdaptSearch suite) and run RAxML to build a phylogeny.

.. class:: infomark

full RAxML manual here_

.. _here: https://sco.h-its.org/exelixis/resource/download/NewManual.pdf

--------

**Parameters**

 - The choice of the format sequences is possible : **proteic** or **nucleic**

- Several RAxML parameters can be set :

    - Substitution model (-m) : Model of Binary (Morphological), Nucleotide, Multi-state, or Amino-Acid substitution
        Default : GTRGAMMA (nucleic), PROTCAT (proteic).

    - Matrix : AA substitution model (when proteic inputs)
        Default : DAYHOFF

    - random seed : Specifies a random number seed for the parsimony inferences. For all options/algorithms  in RAxML  that require some  sort of randomization,  this option must be specified. Make sure to pass different random number seeds to RAxML and not only 12345.
     
    - Number of runs (-N) : Specifies the number of alternative runs.         
        By default it's an integer of value 100.

    - Use bootstopping criteria for number of runs :
        If selected, overxwrites the number of runs to use bootstopping criteria.

    - Algorithm to execute (-f) : allows to choose what kind of algorithme RAxML shall execute.
        Default : Rapid bootsrap and best ML tree search (-f a).

    - Multiple model assignement t oalignment partitions (-q) : an optional parameter. Permits to specify the file name which contains the assignment of models to alignment partitions for multiple models of substitution. For the syntax of this file please consult the manual.
        This option allows you to specify the regions of your alignment for which an individual model of nucleotide substitution should be estimated. This will typically be useful to infer trees for long multi-gene alignments.

    - Rapid bootstrapping random seed (-x) : Specify an integer number (random seed) and turn on rapid bootstrapping.
        In addition to the best tree search.
        By default, this option is choosen.    

--------

**Inputs**

    - Files from Filter Assemblies : a set of fasta files (one file per species), e.g. the outputs of the first tool of the AdaptSearch suite.
        Used to retrieve all the species names.

    - Alignment files without indels : a set of fasta files with aligned sequences (with the same species than into the previous parameter), e.g the outputs of the CDS_Search tool of the AdaptSearch suite.

--------

**Outputs**

This tool, produces the following files :

    - Phylogeny :
        the general output. It gives the information about the concatenation (statistics) and the RAxML run.

    - Phylogeny_concatenation_fasta_aa :
        contains the sequences concatenated in fasta format when you choose the option proteic.

    - Phylogeny_concatenation_phylip_aa :
        contains the sequences concatenated in phylip format when you choose the option proteic.

    - Phylogeny_concatenation_nexus_aa :
        contains the sequences concatenated in nexus format when you choose the option proteic.

    - Phylogeny_concatenation_fasta_nuc :
        contains the sequences concatenated in fasta format when you choose the option nucleic.

    - Phylogeny_concatenation_phylip_nuc :
        contains the sequences concatenated in phylip format when you choose the option nucleic.
        it's this output which is used for the RAxML run.

    - Phylogeny_concatenation_nexus_nuc :
        contains the sequences concatenated in nexus format when you choose the option nucleic.

    - Phylogeny_RAxML_BestTree** :
        the output of RAxML run which contains the Best Tree found.

    - Phylogeny_RAxML_BiPartitionBranchLabel :
        the output of RAxML run which contains the Best Tree found with supported values as branch labels.

    - Phylogeny_RAxML_BiPartition :
        the output of RAxML run which contains the Best Tree found with supported values.

    - Phylogeny_RAxML_BootStrap :
        the output of RAxML run which contains all the boostrapped trees. The number of boostraped trees depending of the option -N (number of run).

---------

**The AdaptSearch Pipeline**

.. image:: adaptsearch_picture_helps.png

---------

Changelog
---------

**Version 2.0 - 06/07/2017**

 - NEW: Replace the zip between tools by Dataset Collection

**Version 1.0 - 13/04/2017**

 - Add funtional test with planemo
 - Planemo test with conda dependencies for raxml and python
 - Scripts renamed + symlinks to the directory 'scripts'

    ]]>

	</help>

	<expand macro="citations" />

</tool>