changeset 0:8be5b7d52cae draft

Uploaded
author dcorreia
date Mon, 04 Jul 2016 11:06:15 -0400
parents
children 6a1a606be502
files phyml_sms.xml
diffstat 1 files changed, 217 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/phyml_sms.xml	Mon Jul 04 11:06:15 2016 -0400
@@ -0,0 +1,217 @@
+<tool id="phyml-sms" name="PhyML-SMS" version="1.3.1">
+	<description>Maximum likelihood-based inference of phylogenetic trees with Smart Model Selection</description>
+    <requirements>
+        <requirement type="package" version="1.3">phyml-sms</requirement>
+    </requirements>
+	<command><![CDATA[
+		sms.sh
+		-i $input
+		-o .
+		-p $output_models
+		-t
+		-d $sequence.seqtype
+
+   		-c $stat_crit
+				
+        #if $inpuTree.inputtree == "true" :
+            -u $inpuTree.userInpuTree
+        #end if    
+		-s $move
+	
+		#if $support_condition.support == "sh":
+		    -b -4
+		#else if $support_condition.support == "aBayes":
+		    -b -5
+		#else if $support_condition.support == "no":
+		    -b 0
+		#else if $support_condition.support == "boot":
+		    -b $support_condition.boot_number
+		#end if
+
+		#if $randstart.value != 0 and $move.value == "SPR" :
+		    -r $randstart
+		#end if
+		> $output_stdout
+		;
+		mv *_phyml_tree.txt $output_tree;
+		mv *_phyml_stats.txt $output_stats;
+			]]>
+   	</command>
+	<inputs>
+		<param format="phylip" name="input" type="data" label="Alignment file" help="phylip format"/>
+		<conditional name="sequence">
+			<param name="seqtype" type="select" label="Data type">
+				<option value="nt">Nucleic acids</option>
+				<option value="aa">Amino acids</option>
+				<option value="auto">Auto</option>
+			</param>
+			<when value="nt" />
+			<when value="aa" />
+			<when value="auto">
+				<param name="input_info" type="data" format="txt" multiple="false" label="sequence type info"
+					help="Precompute file containning sequence description (dna or protein)" />
+			</when>
+		</conditional>   
+		<param name="stat_crit" type="select" label="Statistical criterion to select the model">
+				<option value="aic">AIC</option>
+				<option value="bic">BIC</option>
+		</param>
+		<param name="move" type="select" label="Tree topology search" display="radio">
+			<option value="NNI">NNI (Nearest Neighbor Interchange)</option>
+			<option value="SPR">SPR (Subtree Pruning and Regraphing)</option>
+		</param>
+   		<conditional name="support_condition">
+   			<param type="select" name="support" label="Branch support" help="Use aLRT or aBayes to save computing time">
+				<option value="sh">SH-like aLRT</option>
+				<option value="aBayes">aBayes</option>
+				<option value="boot">Bootstrap</option>
+				<option value="no">No branch support</option>
+			</param>  
+			<when value="sh"/>
+			<when value="aBayes"/>
+			<when value="boot">
+				<param type="integer" name="boot_number" min="1" value="100" label="Number of bootstrap replicates" help="Must be a positive integer"/>
+			</when>
+			<when value="no"/>
+		</conditional>
+               <conditional name="inpuTree">
+                   <param name="inputtree" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Use input starting tree" />
+                   <when value="true">
+                       <param name="userInpuTree" type="data" label="Tree file" help="newick format"/>
+                   </when>
+                   <when value="false"/>
+        </conditional>
+        <param type="integer" name="randstart" value="0" min="0" max="10" label="Number of random starting trees" />
+	</inputs>
+	<outputs>
+		<data format="nhx" name="output_tree" label="PhyML Newick tree"/>
+		<data format="txt" name="output_stats" label="PhyML Statistics"/>
+		<data format="txt" name="output_stdout" label="SMS Best Model"/>
+		<data format="txt" name="output_models" label="SMS compare models"/>
+	</outputs>
+	<tests>
+        <test>
+            <param name="input" value="phylip" />
+            <param name="seqtype" value="nt" />
+            <output name="output_models" file="sms.csv" />
+
+        </test>
+    </tests>
+	<help><![CDATA[
+
+.. class:: infomark
+
+This script runs SMS to select the substitution model which best fits the input data.
+It may also run PhyML with the selected model.
+SMS options :
+	 -h = Help
+	 -i = **Mandatory** Input alignment file in PHYLIP format
+	 -d = **Mandatory** Data type : 'aa' or 'nt'
+	 -o = Output directory
+	 -p = Output CSV filename
+	 -c = Statistical criterion to select the model : 'AIC' (default) or 'BIC'
+	 -u = Input starting tree (Newick format)
+	 -t = Use this option if you want SMS to infer a tree with PhyML using the SMS selected model
+ PhyML options :
+	 -s = Type of tree improvement : 'NNI (default)' or 'SPR'
+	 -r = Number of random starting trees : 0 (default)
+	 -b = Branch Support : >0 for bootstraps, -4 for aLRT, 0 (default)
+
+**PhyML 20120412**
+
+-----
+
+
+===========
+ Overview:
+===========
+
+PhyML is a phylogeny software based on the maximum-likelihood principle. Early PhyML versions used a fast algorithm to perform Nearest Neighbor Interchanges (NNIs), in order to improve a reasonable starting tree topology. Since the original publication (Guindon and Gascuel 2003), PhyML has been widely used due to its simplicity and a fair accuracy/speed compromise. In the mean time research around PhyML has continued. 
+
+We designed an efficient algorithm to search the tree space using Subtree Pruning and Regrafting (SPR) topological moves (Hordijk and Gascuel 2005), and proposed a fast branch test based on an approximate likelihood ratio test (Anisimova and Gascuel 2006). However, these novelties were not included in the official version of PhyML, and we found that improvements were still needed in order to make them effective in some practical cases. PhyML 3.0 achieves this task. 
+
+It implements new algorithms to search the space of tree topologies with user-defined intensity. A non-parametric, Shimodaira-Hasegawa-like branch test is also available. The program provides a number of new evolutionary models and its interface was entirely re-designed. We tested PhyML 3.0 on a large collection of real data sets to ensure that the new version is stable, ready-to-use and still reasonably fast and accurate. 
+
+-----
+
+For further informations, please visite the PhyML_ and SMS_ website.
+
+
+.. _PhyML: http://www.atgc-montpellier.fr/phyml/
+.. _SMS: http://www.atgc-montpellier.fr/phyml-sms/
+
+
+Models References:
+
+- Dayhoff : Dayhoff, M., Schwartz, R. & Orcutt, B.
+    A model of evolutionary change in proteins.
+    In Dayhoff, M. (ed.) Atlas of Protein Sequence and Structure, vol. 5, 345–352 (National Biomedical Research Foundation, Washington, D. C., 1978)
+
+- JTT : Jones, D., Taylor, W. & Thornton, J.
+    The rapid generation of mutation data matrices from protein sequences.
+    Computer Applications in the Biosciences (CABIOS) 8, 275–282 (1992).
+
+- Blosum62 : Henikoff, S. & Henikoff, J.
+    Amino acid substitution matrices from protein blocks.
+    Proceedings of the National Academy of Sciences of the United States of America (PNAS) 89, 10915–10919 (1992).
+
+- MtREV : Adachi, J. & Hasegawa, M.
+    MOLPHY version 2.3. programs for molecular phylogenetics based on maximum likelihood.
+    In Ishiguro, M. et al. (eds.) Computer Science Monographs, 28 (The Institute of Statistical Mathematics, Tokyo,1996).
+
+- MtMam : Cao, Y., A. Janke , P. J. Waddell, M. Westerman, O. Takenaka, S. Murata, N. Okada, S. Paabo, and M. Hasegawa
+    Conflict among individual mitochondrial proteins in resolving the phylogeny of eutherian orders.
+    Journal of Molecular Evolution 47, 307–322 (1998).
+
+- CpREV : Adachi, J., P., W., Martin, W. & Hasegawa, M.
+    Plastid genome phylogeny and a model of amino acid substitution for proteins encoded by chloroplast DNA.
+    Journal of Molecular Evolution 50, 348–358 (2000).
+
+- VT : Muller, T. & Vingron, M.
+    Modeling amino acid replacement.
+    Journal of Computational Biology 7, 761–776 (2000).
+
+- WAG : Whelan, S. & Goldman, N.
+    A general empirical model of protein evolution derived from multiple protein families using a maximum-likelihood approach.
+    Molecular Biology and Evolution 18, 691–699 (2001).
+
+- RtREV : Dimmic, M., Rest, J., Mindell, D. & Goldstein, D.
+    rtREV : an amino acid substitution matrix for inference of retrovirus and reverse transcriptase phylogeny.
+    Journal of Molecular Evolution 55, 65–73 (2002).
+
+- DCMut : Kosiol, C. & Goldman, N.
+    Different versions of the Dayhoff rate matrix.
+    Molecular Biology and Evolution 22, 193–199 (2004).
+
+- MtArt : Abascal F, Posada D, Zardoya R.
+    MtArt: a new model of amino acid replacement for Arthropoda.
+    Mol Biol Evol. 2007 Jan;24(1):1-5. Epub 2006 Oct 16.
+
+- HIVb - HIVw : Nickle DC, Heath L, Jensen MA, Gilbert PB, Mullins JI, Kosakovsky Pond SL.
+    HIV-Specific Probabilistic Models of Protein Evolution.
+    PLoS ONE. 2007 Jun 6;2:e503.
+
+- LG : Le, S. & Gascuel, O.
+    An improved general amino-acid replacement matrix.
+    Mol. Biol. Evol. 25, 1307–1320 (2008)
+
+- MtZOA : Rota-Stabelli O, Yang Z, Telford MJ
+    MtZoa: A general mitochondrial amino acid substitutions model for animal evolutionary studies
+    Molecular Phylogenetics and Evolution 52 (2009) 268–272
+
+- FLU : Cuong Cao Dang, Quang Si Le2, Olivier Gascuel and Vinh Sy Le
+    FLU, an amino acid substitution model for influenza proteins.
+    BMC Evolutionary Biology 2010 Apr 12;10:99
+
+- AB :
+    Alexander Mirsky,Linda Kazandjian and Maria Anisimova
+    Antibody-specific Model of Amino Acid Substitution for Immunological Inferences from Alignments of Antibody Sequences
+    Mol Biol Evol (2014) doi: 10.1093/molbev/msu340
+		]]>
+	</help>
+    <citations>
+        <citation type="doi">10.1093/sysbio/syq010</citation>
+        <citation type="doi">10.1093/nar/gki352</citation>
+        <citation type="doi">10.1080/10635150390235520</citation>
+    </citations>
+</tool>