diff ConcatPhyl.xml @ 2:1f8d039bd241 draft

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
author abims-sbr
date Wed, 27 Sep 2017 10:03:45 -0400
parents 6d930f037fea
children 0464ec48bc3a
line wrap: on
line diff
--- a/ConcatPhyl.xml	Thu Apr 13 09:48:47 2017 -0400
+++ b/ConcatPhyl.xml	Wed Sep 27 10:03:45 2017 -0400
@@ -1,4 +1,4 @@
-<tool name="ConcatPhyl" id="concatphyl" version="1.0">
+<tool name="ConcatPhyl" id="concatphyl" version="2.0">
 
 	<description>
 		Concatenation and phylogeny
@@ -10,96 +10,85 @@
 
 	<requirements>
 		<expand macro="python_required" />
-		<!-- <requirement type="package" version="1.3.1">samtools</requirement> -->
 		<requirement type="package" version="8.2.9">raxml</requirement>
 	</requirements>
 
   	<command><![CDATA[
-		python $__tool_directory__/scripts/S01_concatenate.py ${zip}
-
-		#if $format.format_run == "nucleic" :
-			nucleic $format.zip_nuc
-		#elif $format.format_run == "proteic" :
-			proteic $format.zip_aa
-		#end if
-		> ${output};
+        #set $infiles_filter_assemblies = ""
+        #for $input_filter_assemblie in $input_filter_assemblies
+            ln -s '$input_filter_assemblie' '$input_filter_assemblie.element_identifier';
+            #set $infiles_filter_assemblies = $infiles_filter_assemblies + $input_filter_assemblie.element_identifier + ","
+        #end for
+        #set $infiles_filter_assemblies = $infiles_filter_assemblies[:-1]
 
-		raxmlHPC 
-		#if $format.format_run == "nucleic" :			
-			-n "galaxy_run"
-			##-q "./05_partitions_gene_NUC"
-			-s "./03_Concatenation_nuc.phy"
-			## (-m)
-			-m $format.base_model
-		#elif $format.format_run == "proteic" :
-			-n "galaxy_run"
-			##-q "./06_partitions_gene_AA"
- 			-s "./02_Concatenation_aa.phy"
-			## (-m)
-			-m $format.base_model$format.aa_search_matrix
-		#end if
+        #set $infiles_alignments = ""
+        #for $input_alignment in $input_alignments
+            ln -s '$input_alignment' '$input_alignment.element_identifier';
+            #set $infiles_alignments = $infiles_alignments + $input_alignment.element_identifier + ","
+        #end for
+        #set $infiles_alignments = $infiles_alignments[:-1]
 
-		## --- Optional parameters ---
+        python $__tool_directory__/scripts/S01_concatenate.py
+
+        $infiles_filter_assemblies
 
-		##if $raxml_options.options == "yes" :
+        #if $format.format_run == "nucleic" :
+            nucleic
+        #elif $format.format_run == "proteic" :
+            proteic
+        #end if
 
-			## (-p)
-			#if $random_seed:
-	    		-p $random_seed
-			#else
-	    		-p 1234567890
-			#end if
+        $infiles_alignments
+        > ${output};
 
-			## (-N/#)
-	   	 	#if $number_of_runs:
-	        		-N $number_of_runs
-	   		#end if
-			#if $number_of_runs_bootstop:
-	    		-# $number_of_runs_bootstop
-			#end if
+        raxmlHPC -n galaxy_run
+        #if $format.format_run == "nucleic" :
+            ##-q 05_partitions_gene_NUC
+            -s "03_Concatenation_nuc.phy"
+            -m $format.base_model
+        #elif $format.format_run == "proteic" :
+            ##-q 06_partitions_gene_AA
+            -s 02_Concatenation_aa.phy
+            -m $format.base_model$format.aa_search_matrix
+        #end if
 
-			## (-f)
-			#if $search_algorithm:
-				-f $search_algorithm
-			#end if
+        -p $random_seed
+        
+        #if $number_of_runs !="" and $number_of_runs_bootstop =="":
+            -N $number_of_runs
+            -x $rapid_bootstrap_random_seed
+        #elif ($number_of_runs !="" and $number_of_runs_bootstop !="") or ($number_of_runs =="" and $number_of_runs_bootstop !=""):
+            -N $number_of_runs_bootstop
+            -x $rapid_bootstrap_random_seed
+        #end if
 
-			## (-x)
-			#if $rapid_bootstrap_random_seed:
-			-x $rapid_bootstrap_random_seed
-			#end if
-		##else :
+        -f $search_algorithm
 
-	 	##-N 100 -f a -x 12345
-
-		##end if				
-		>> ${output};
-	]]>
+        >> ${output};
+    ]]>
   	</command>
 
  	<inputs>
 
-		<param name="zip" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file" help="Contains the files filter after the tool oase" />
+		<param name="input_filter_assemblies" type="data" format="fasta" multiple="true" label="Files from Filter assemblies" />
+        <param name="input_alignments" type="data" format="fasta" multiple="true" label="Aligned files without indels" help="nucleic or proteic format according to the analysis you want to do below"/>
+
 		<conditional name="format">
 			<param name="format_run" type="select" label="Which format do you want to use for this tool (concatenation and RAxML run) ? ">
 				<option value="nucleic">Nucleic format</option>
 				<option value="proteic">Proteic format</option>
 			</param>
 
-			<when value="nucleic">
-				<param name="zip_nuc" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file" help="It must contain the aligned files without indels in NUCLEIC format" />
-				<!-- ## Nucleotide substitution models -->
+			<when value="nucleic">				
 		    	<param name="base_model" type="select" label="Substitution Model">
-		        	<option value="GTRCAT">GTRCAT</option> 
-		        	<option value="GTRCATI">GTRCATI</option> 
+		        	<option value="GTRCAT">GTRCAT</option>
+		        	<option value="GTRCATI">GTRCATI</option>
 		        	<option value="GTRGAMMA" selected="true">GTRGAMMA</option>
 		        	<option value="GTRGAMMAI">GTRGAMMAI</option>
 		   		</param>
 			</when>
 
-			<when value="proteic">
-				<param name="zip_aa" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file" help="It must contain the aligned files without indels in PROTEIC format" />
-				<!-- ## Aminoacid substitution models -->
-				<!--<param name="aa_model_empirical_base_frequencies" type="boolean" checked="no" truevalue="F" falsevalue="X" display="checkboxes" label="Use empirical base frequencies in AA models." /> -->
+			<when value="proteic">			
 		    	<param name="base_model" type="select" label="Substitution Model (-m)">
 					<option value="PROTCAT" selected="true">PROTCAT</option>
 					<option value="PROTCATI">PROTCATI</option>
@@ -111,22 +100,10 @@
 					<option value="JTT">JTT</option>
 					<option value="WAG">WAG</option>
 					<option value="BLOSUM62">BLOSUM62</option>
-			    </param>	
+			    </param>
 			</when>
 		</conditional>
 
-<!-- <conditional name="raxml_options"> -->
-
-<!--
-<param name="options" type="select" label="Raxml advanced options">
-				<option value="yes">Yes</option>
-				<option value="no" select="true">No</option>
-</param>
-
--->
-
-<!-- <when value="yes"> -->
-
 		<param name="random_seed" type="integer" value="1234567890" size="12" label="Random seed used for the parsimony inferences" />
 
 		<!-- ## (-N/#) -->
@@ -149,11 +126,11 @@
 
 		<!-- ## (-f) -->
    		<param name="search_algorithm" type="select" label="Algorithm to execute" optional="True">
-            <option value="a">Rapid bootstrap and best ML tree search (a)</option>
+            <option value="a" selected="true">Rapid bootstrap and best ML tree search (a)</option>
             <option value="A">Compute marginal ancestral states (A)</option>
             <option value="b">Draw bipartition information (b)</option>
             <option value="c">Check if the alignment can be read (c)</option>
-            <option value="d" selected="true">Hill-climbing ML Search (d) (default)</option>
+            <option value="d">Hill-climbing ML Search (d) (default)</option>
             <option value="e">Optimize GAMMA/GAMMAI model/branches (e)</option>
             <option value="g">Compute per-site log likelihoods for -z trees (g)</option>
             <option value="h">Compute log likelihood test for -t / -z trees (h)</option>
@@ -180,19 +157,14 @@
 	  	<param name="multiple_model" format="txt" type="data" label="Multiple model assignment to alignment partitions" optional="True" help="Specify the file name which contains the assignment of models to alignment partitions for multiple models of substitution. For the syntax of this file please consult the manual." />
 
 	 	<!-- ## (-x) -->
-         <param name="rapid_bootstrap_random_seed" type="integer" value='1234567890' size="7" label="Rapid bootstrapping random seed" optional="True" help="Specify a random seed and turn on rapid bootstrapping. CAUTION: unlike in version 7.0.4 RAxML will conduct rapid BS replicates under the model of rate heterogeneity you specified via '-m' and not by default under CAT." />
-<!-- </when> -->
-
-
-<!-- </conditional> -->
+         <param name="rapid_bootstrap_random_seed" type="integer" value='12345' size="7" label="Rapid bootstrapping random seed" optional="True" help="Specify a random seed and turn on rapid bootstrapping. CAUTION: unlike in version 7.0.4 RAxML will conduct rapid BS replicates under the model of rate heterogeneity you specified via '-m' and not by default under CAT." />
 
 		<param name="out" type="select" label="What format of file do you want for your output (concatenation of the sequences) ? ">
 			<option value="nothing">No output</option>
 			<option value="fasta">Fasta format</option>
-			<option value="phylip">Phylip format</option> 
+			<option value="phylip">Phylip format</option>
 			<option value="nexus">Nexus format</option>
 		</param>
-          <!-- -m GTRGAMMA -N 100 -f a -x 12345 -->
 		
 		<param name="raxml1" type="boolean" label="Do you want the output of RAxML : best tree ? " />
 		<param name="raxml3" type="boolean" label="Do you want the output of RAxML : bi-partition ? " />
@@ -230,11 +202,11 @@
 		<data name="out_raxml1" format="txt" label="Phylogeny_RAxML_BestTree" from_work_dir="RAxML_bestTree.galaxy_run">
 			<filter>raxml1 == True</filter>
 		</data>
-	
+
 		<data name="out_raxml3" format="txt" label="Phylogeny_RAxML_BiPartition" from_work_dir="RAxML_bipartitions.galaxy_run">
 			<filter>raxml3 == True</filter>
 		</data>
-		
+
 		<data name="out_raxml4" format="txt" label="Phylogeny_RAxML_BootStrap" from_work_dir="RAxML_bootstrap.galaxy_run">
 			<filter>raxml4 == True</filter>
 		</data>
@@ -242,10 +214,10 @@
 
 	<tests>
 		<test>
-			<param name="zip" ftype="zip" value="from_filter_oase.zip" />
-			<conditional name="format">
+            <param name="input_filter_assemblies" ftype="fasta" value="input_filter_assemblies/AcAcaud_trinity.fasta,input_filter_assemblies/AmAmphi_trinity.fasta,input_filter_assemblies/ApApomp_trinity.fasta,input_filter_assemblies/PgPgras_trinity.fasta,input_filter_assemblies/PhPhess_trinity.fasta,input_filter_assemblies/ThThelep_trinity.fasta" />
+            <param name="input_alignments" ftype="fasta" value="input_from_CDS_Search/locus17_sp3_sp3.fasta,input_from_CDS_Search/locus147_sp3_sp3.fasta,input_from_CDS_Search/locus183_sp3_sp3.fasta,input_from_CDS_Search/locus334_sp3_sp3.fasta" />
+            <conditional name="format">
 				<param name="format_run" value="nucleic" />
-				<param name="zip_nuc" ftype="zip" value="test_05_output_CDS_Search_input_ConcatPhyl.zip" />
 				<param name="base_model" value="GTRGAMMA" />
 			</conditional>
 			<param name="random_seed" value="1234567890" />
@@ -253,34 +225,70 @@
 			<param name="number_of_runs_bootstop" value="" />
 			<param name="search_algorithm" value="d" />
 			<!-- <param name="multiple_model" value="" /> -->
-			<param name="rapid_bootstrap_random_seed" value="123456789" />	
-			<param name="out" value="nothing" />			
+			<param name="rapid_bootstrap_random_seed" value="123456789" />
+			<param name="out" value="nothing" />
 			<param name="raxml1" value="True" />
 			<param name="raxml3" value="True" />
-			<param name="raxml4" value="True" />			
+			<param name="raxml4" value="True" />
 			<output name="out_raxml4">
-				<assert_contents>					
-					<has_text text="(Ap,(((Pf,Ph),Pg),((Pu,Te),(Am,Th))),Ac);"/>
-					<has_text text="(Ap,(Ph,(Pg,((Pf,(Pu,Te)),(Am,Th)))),Ac);"/>
-					<has_text text="(Ap,(((Pu,Te),(Am,Th)),((Pf,Ph),Pg)),Ac);"/>
+				<assert_contents>
+					<has_text text="((Pg,(Am,Th)),(Ph,Ap),Ac);"/>
+					<has_text text="((Th,(Pg,Am)),(Ph,Ap),Ac);"/>
+					<has_text text="((Ph,Ap),(Am,(Pg,Th)),Ac);"/>
 				</assert_contents>
-			</output>				
-			
+			</output>
 		</test>
+
+        <test>
+            <param name="input_filter_assemblies" ftype="fasta" value="input_filter_assemblies/AcAcaud_trinity.fasta,input_filter_assemblies/AmAmphi_trinity.fasta,input_filter_assemblies/ApApomp_trinity.fasta,input_filter_assemblies/PgPgras_trinity.fasta,input_filter_assemblies/PhPhess_trinity.fasta,input_filter_assemblies/ThThelep_trinity.fasta" />
+            <param name="input_alignments" ftype="fasta" value="input_from_CDS_Search/locus17_sp3_sp3.fasta,input_from_CDS_Search/locus147_sp3_sp3.fasta,input_from_CDS_Search/locus183_sp3_sp3.fasta,input_from_CDS_Search/locus334_sp3_sp3.fasta" />
+            <conditional name="format">
+                <param name="format_run" value="nucleic" />
+                <param name="base_model" value="GTRGAMMA" />
+            </conditional>
+            <param name="random_seed" value="1234567890" />
+            <param name="number_of_runs" value="100" />
+            <param name="number_of_runs_bootstop" value="" />
+            <param name="search_algorithm" value="a" />            
+            <param name="rapid_bootstrap_random_seed" value="1234567890" />
+            <param name="out" value="nothing" />
+            <param name="raxml1" value="True" />
+            <param name="raxml3" value="True" />
+            <param name="raxml4" value="True" />
+            <output name="out_raxml1" value="RAxML_bestTree"/>     
+            <output name="out_raxml3" value="RAxML_bipartitions"/>                
+        </test>
+        
+        <test>
+            <param name="input_filter_assemblies" ftype="fasta" value="input_filter_assemblies/AcAcaud_trinity.fasta,input_filter_assemblies/AmAmphi_trinity.fasta,input_filter_assemblies/ApApomp_trinity.fasta,input_filter_assemblies/PgPgras_trinity.fasta,input_filter_assemblies/PhPhess_trinity.fasta,input_filter_assemblies/ThThelep_trinity.fasta" />
+            <param name="input_alignments" ftype="fasta" value="input_from_CDS_Search/locus17_sp3_sp3.fasta,input_from_CDS_Search/locus147_sp3_sp3.fasta,input_from_CDS_Search/locus183_sp3_sp3.fasta,input_from_CDS_Search/locus334_sp3_sp3.fasta" />
+            <conditional name="format">
+                <param name="format_run" value="nucleic" />
+                <param name="base_model" value="GTRGAMMA" />
+            </conditional>
+            <param name="random_seed" value="1234567890" />
+            <param name="number_of_runs" value="100" />
+            <param name="number_of_runs_bootstop" value="autoMR" />
+            <param name="search_algorithm" value="a" />            
+            <param name="rapid_bootstrap_random_seed" value="1234567890" />
+            <param name="out" value="nothing" />
+            <param name="raxml1" value="True" />
+            <param name="raxml3" value="True" />
+            <param name="raxml4" value="True" />
+            <output name="out_raxml1" value="RAxML_bestTree_test3"/>     
+            <output name="out_raxml3" value="RAxML_bipartitions_test3"/>                
+        </test>
 	</tests>
 
 	<help>
 
+@HELP_AUTHORS@
+
 ============
 What it does
 ============
 
-| This tool takes a zip file containing nucleic fasta sequence files and searches different homologous genes from pairwise comparisons. 
-| 
-|
-| The run RAxML was written by **Alexandros Stamatakis**.
-| The script was written by **Eric Fontanillas**.
-| The wrapper was written by **Julie Baffard**.
+| This tool takes a 'dataset collection list'  containing nucleic fasta sequence files and searches different homologous genes from pairwise comparisons.
 
 --------
 
@@ -289,38 +297,41 @@
 ==========
 
 | The choice of the format sequences is possible : **proteic** or **nucleic**
-| 
+|
 
-The choice of parameters for the RAxML run is possible : 
+The choice of parameters for the RAxML run is possible :
 
 **-m** :
-	| is the option for the choice of the substitution model. 
+	| is the option for the choice of the substitution model.
 	| By default it's GTRGAMMA.
-	| 
+	|
 
 **-N** :
 	| is the option for the choice of the number of run
 	| by default it's 100
-	| 
+	|
 
 **rapid bootstrapping** :
 	| is the option to have, in addition to the best tree search, the rapid bootstrapping
 	| this translates by : -x 12345 -f a
 	| by default, this option is choosen
-	| 
+	|
 
+.. class:: warningmark
+| RAxML has some incompatible parameters. 
+| The search algorithm compatible with boostrapping and giving a besttree file is the one set by default:
+| -f a
+
+| The search algorithm compatible with boostrapping and NOT giving a besttree file are:
+| -f d
+| -f o
+| -f t
 --------
 
 ======
 Inputs
 ======
 
-option **Select a zip file containing the input files** :
-
-| the input zip file must have the extension .ort.zip
-| At the beginning, when you upload your input, you have to change the extension .zip to .ort.zip
-
-
 --------
 
 =======
@@ -331,49 +342,49 @@
 
 **Phylogeny** :
 	| is the general output. It gives the information about the concatenation (statistics) and the RAxML run.
-	| 
+	|
 
 **Phylogeny_concatenation_fasta_aa** :
 	| is the output which contains the sequences concatenated in fasta format when you choose the option proteic
-	| 
+	|
 
 **Phylogeny_concatenation_phylip_aa** :
 	| is the output which contains the sequences concatenated in phylip format when you choose the option proteic
-	| 
+	|
 
 **Phylogeny_concatenation_nexus_aa** :
 	| is the output which contains the sequences concatenated in nexus format when you choose the option proteic
-	| 
+	|
 
 **Phylogeny_concatenation_fasta_nuc** :
 	| is the output which contains the sequences concatenated in fasta format when you choose the option nucleic
-	| 
+	|
 
 **Phylogeny_concatenation_phylip_nuc** :
 	| is the output which contains the sequences concatenated in phylip format when you choose the option nucleic
 	| it's this output which is used for the RAxML run
-	| 
+	|
 
 **Phylogeny_concatenation_nexus_nuc** :
 	| is the output which contains the sequences concatenated in nexus format when you choose the option nucleic
-	| 
+	|
 
 **Phylogeny_RAxML_BestTree** :
 	| is the output of RAxML run which contains the Best Tree found
-	| 
+	|
 
 **Phylogeny_RAxML_BiPartitionBranchLabel** :
 	| is the output of RAxML run which contains the Best Tree found with supported values as branch labels
-	| 
+	|
 
 **Phylogeny_RAxML_BiPartition** :
 	| is the output of RAxML run which contains the Best Tree found with supported values
-	| 
+	|
 
 **Phylogeny_RAxML_BootStrap** :
 	| is the output of RAxML run which contains all the boostrapped trees
 	| the number of boostraped trees depending of the option -N (number of run)
-	| 
+	|
 
 --------
 
@@ -387,12 +398,12 @@
 
 **Input files**
 	| 6 files with 200 nucleic sequences each
-	| a zip file containing 2 locus aligned without indel (in nucleic format)
-	| 
+	| a 'dataset collection list' containing 2 locus aligned without indel (in nucleic format)
+	|
 **Parameters**
 	| option : nucleic
 	| no option for the RAxML run, so by default it's : -m GTRGAMMA -N 100 -f a -x 12345
-	| 
+	|
 
 ----------------
 The output files
@@ -401,16 +412,16 @@
 **Phylogeny** :
 
 | ******************** CONCATENATION ********************
-| 
+|
 | Process nucleotides concatenation:
 | Number of taxa aligned = 6
 | Number of loci concatenated = 2
-| 
+|
 | Total length of the concatenated sequences [All codon positions] = 504
 | Total length of the concatenated sequences [Codon positions 1 and 2] = 336
 | Total length of the concatenated sequences [Codon position 3] = 168
-| 
-| 
+|
+|

 | ******************** RAxML RUN ********************

@@ -418,7 +429,7 @@
 the informations of the RAxML run
 

- 
+
 **Phylogeny_concatenation_fasta_nuc** :
 
 | &gt;Ps
@@ -458,7 +469,7 @@
 .. class:: infomark
 
 | If you choose the option proteic : you obtain a file with proteic sequences
-| 
+|

 
 
@@ -480,7 +491,7 @@
 | Pp
 | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 | ------------------------------------------------------------------------------------------------ataatccttgacgaccacacactgcatccaacaacttttctggccttgccttccttgtctattttacacaaaccagcccat
-| 
+|
 | Ap
 
 cgcagctcctcggtgacgcggtgcagctcggcggcgaggcgatcggctctctcctcggctgccctgcgggcgttcagggcctcatcaaggtcggcctgcatggcggcgatgtcgccctccatgcggcgcttgtcgccggtcagagtcgtcaccgtgatgttcagctcgttgacgcgagccgtggcgtcgtgtagctcgttctcggcattcttacgagctcgttcggc
@@ -500,7 +511,7 @@
 .. class:: infomark
 
 | If you choose the option proteic : you obtain a file with proteic sequences
-| 
+|

 
 **Phylogeny_concatenation_nexus_nuc** :
@@ -538,19 +549,19 @@
 
 | 	;
 | End;
-| 
+|
 
 .. class:: infomark
 
 | If you choose the option proteic : you obtain a file with proteic sequences
-| 
+|

 
 **Phylogeny_RAxML_BestTree** :
 
 | ((Ac:0.02889451913999640381,Ap:0.01674414484251282934):0.17730049470177636217,
 | ((Pp:0.23405795780876006984,Pg:0.02012322210145659623):0.14429203507314311561,Pf:0.09977363663005259231):0.04320803212100913365,Ps:0.08351583721596630983):0.0;
-| 
+|

 
 
@@ -558,8 +569,8 @@
 
 | (Pg:0.02012322210145659623,(Pf:0.09977363663005259231,(Ps:0.08351583721596630983,
 | (Ac:0.02889451913999640381,Ap:0.01674414484251282934):0.17730049470177636217[89]):0.04320803212100913365[42]):0.14429203507314311561[70],Pp:0.23405795780876006984);
-| 
-| 
+|
+|
 
 
 **Phylogeny_RAxML_BiPartition** :
@@ -567,8 +578,8 @@
 (Pg:0.02012322210145659623,(Pf:0.09977363663005259231,(Ps:0.08351583721596630983,
 (Ac:0.02889451913999640381,Ap:0.01674414484251282934)89:0.17730049470177636217)42:0.04320803212100913365)70:0.14429203507314311561,Pp:0.23405795780876006984);
 
-| 
-| 
+|
+|
 
 **Phylogeny_RAxML_BootStrap** :
 
@@ -582,6 +593,25 @@
 
 ...
 
+---------------------------------------------------
+
+Changelog
+---------
+
+**Version 2.0 - 06/07/2017**
+
+ - NEW: Replace the zip between tools by Dataset Collection
+ - Corrected bug : output files were empty due to errors in the command section (incompatible parameters set by default instead of the ones mentioned in the help)
+
+
+**Version 1.0 - 13/04/2017**
+
+ - Add funtional test with planemo
+
+ - Planemo test with conda dependencies for raxml and python
+
+ - Scripts renamed + symlinks to the directory 'scripts'
+
 	</help>
 
 	<expand macro="citations" />