Mercurial > repos > abims-sbr > concatphyl
diff ConcatPhyl.xml @ 2:1f8d039bd241 draft
planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 44a89d5eeb82789bfc643b33c11f391281b6374b
author | abims-sbr |
---|---|
date | Wed, 27 Sep 2017 10:03:45 -0400 |
parents | 6d930f037fea |
children | 0464ec48bc3a |
line wrap: on
line diff
--- a/ConcatPhyl.xml Thu Apr 13 09:48:47 2017 -0400 +++ b/ConcatPhyl.xml Wed Sep 27 10:03:45 2017 -0400 @@ -1,4 +1,4 @@ -<tool name="ConcatPhyl" id="concatphyl" version="1.0"> +<tool name="ConcatPhyl" id="concatphyl" version="2.0"> <description> Concatenation and phylogeny @@ -10,96 +10,85 @@ <requirements> <expand macro="python_required" /> - <!-- <requirement type="package" version="1.3.1">samtools</requirement> --> <requirement type="package" version="8.2.9">raxml</requirement> </requirements> <command><![CDATA[ - python $__tool_directory__/scripts/S01_concatenate.py ${zip} - - #if $format.format_run == "nucleic" : - nucleic $format.zip_nuc - #elif $format.format_run == "proteic" : - proteic $format.zip_aa - #end if - > ${output}; + #set $infiles_filter_assemblies = "" + #for $input_filter_assemblie in $input_filter_assemblies + ln -s '$input_filter_assemblie' '$input_filter_assemblie.element_identifier'; + #set $infiles_filter_assemblies = $infiles_filter_assemblies + $input_filter_assemblie.element_identifier + "," + #end for + #set $infiles_filter_assemblies = $infiles_filter_assemblies[:-1] - raxmlHPC - #if $format.format_run == "nucleic" : - -n "galaxy_run" - ##-q "./05_partitions_gene_NUC" - -s "./03_Concatenation_nuc.phy" - ## (-m) - -m $format.base_model - #elif $format.format_run == "proteic" : - -n "galaxy_run" - ##-q "./06_partitions_gene_AA" - -s "./02_Concatenation_aa.phy" - ## (-m) - -m $format.base_model$format.aa_search_matrix - #end if + #set $infiles_alignments = "" + #for $input_alignment in $input_alignments + ln -s '$input_alignment' '$input_alignment.element_identifier'; + #set $infiles_alignments = $infiles_alignments + $input_alignment.element_identifier + "," + #end for + #set $infiles_alignments = $infiles_alignments[:-1] - ## --- Optional parameters --- + python $__tool_directory__/scripts/S01_concatenate.py + + $infiles_filter_assemblies - ##if $raxml_options.options == "yes" : + #if $format.format_run == "nucleic" : + nucleic + #elif $format.format_run == "proteic" : + proteic + #end if - ## (-p) - #if $random_seed: - -p $random_seed - #else - -p 1234567890 - #end if + $infiles_alignments + > ${output}; - ## (-N/#) - #if $number_of_runs: - -N $number_of_runs - #end if - #if $number_of_runs_bootstop: - -# $number_of_runs_bootstop - #end if + raxmlHPC -n galaxy_run + #if $format.format_run == "nucleic" : + ##-q 05_partitions_gene_NUC + -s "03_Concatenation_nuc.phy" + -m $format.base_model + #elif $format.format_run == "proteic" : + ##-q 06_partitions_gene_AA + -s 02_Concatenation_aa.phy + -m $format.base_model$format.aa_search_matrix + #end if - ## (-f) - #if $search_algorithm: - -f $search_algorithm - #end if + -p $random_seed + + #if $number_of_runs !="" and $number_of_runs_bootstop =="": + -N $number_of_runs + -x $rapid_bootstrap_random_seed + #elif ($number_of_runs !="" and $number_of_runs_bootstop !="") or ($number_of_runs =="" and $number_of_runs_bootstop !=""): + -N $number_of_runs_bootstop + -x $rapid_bootstrap_random_seed + #end if - ## (-x) - #if $rapid_bootstrap_random_seed: - -x $rapid_bootstrap_random_seed - #end if - ##else : + -f $search_algorithm - ##-N 100 -f a -x 12345 - - ##end if - >> ${output}; - ]]> + >> ${output}; + ]]> </command> <inputs> - <param name="zip" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file" help="Contains the files filter after the tool oase" /> + <param name="input_filter_assemblies" type="data" format="fasta" multiple="true" label="Files from Filter assemblies" /> + <param name="input_alignments" type="data" format="fasta" multiple="true" label="Aligned files without indels" help="nucleic or proteic format according to the analysis you want to do below"/> + <conditional name="format"> <param name="format_run" type="select" label="Which format do you want to use for this tool (concatenation and RAxML run) ? "> <option value="nucleic">Nucleic format</option> <option value="proteic">Proteic format</option> </param> - <when value="nucleic"> - <param name="zip_nuc" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file" help="It must contain the aligned files without indels in NUCLEIC format" /> - <!-- ## Nucleotide substitution models --> + <when value="nucleic"> <param name="base_model" type="select" label="Substitution Model"> - <option value="GTRCAT">GTRCAT</option> - <option value="GTRCATI">GTRCATI</option> + <option value="GTRCAT">GTRCAT</option> + <option value="GTRCATI">GTRCATI</option> <option value="GTRGAMMA" selected="true">GTRGAMMA</option> <option value="GTRGAMMAI">GTRGAMMAI</option> </param> </when> - <when value="proteic"> - <param name="zip_aa" type="data" format="no_unzip.zip,zip" label="Choose your ZIP file" help="It must contain the aligned files without indels in PROTEIC format" /> - <!-- ## Aminoacid substitution models --> - <!--<param name="aa_model_empirical_base_frequencies" type="boolean" checked="no" truevalue="F" falsevalue="X" display="checkboxes" label="Use empirical base frequencies in AA models." /> --> + <when value="proteic"> <param name="base_model" type="select" label="Substitution Model (-m)"> <option value="PROTCAT" selected="true">PROTCAT</option> <option value="PROTCATI">PROTCATI</option> @@ -111,22 +100,10 @@ <option value="JTT">JTT</option> <option value="WAG">WAG</option> <option value="BLOSUM62">BLOSUM62</option> - </param> + </param> </when> </conditional> -<!-- <conditional name="raxml_options"> --> - -<!-- -<param name="options" type="select" label="Raxml advanced options"> - <option value="yes">Yes</option> - <option value="no" select="true">No</option> -</param> - ---> - -<!-- <when value="yes"> --> - <param name="random_seed" type="integer" value="1234567890" size="12" label="Random seed used for the parsimony inferences" /> <!-- ## (-N/#) --> @@ -149,11 +126,11 @@ <!-- ## (-f) --> <param name="search_algorithm" type="select" label="Algorithm to execute" optional="True"> - <option value="a">Rapid bootstrap and best ML tree search (a)</option> + <option value="a" selected="true">Rapid bootstrap and best ML tree search (a)</option> <option value="A">Compute marginal ancestral states (A)</option> <option value="b">Draw bipartition information (b)</option> <option value="c">Check if the alignment can be read (c)</option> - <option value="d" selected="true">Hill-climbing ML Search (d) (default)</option> + <option value="d">Hill-climbing ML Search (d) (default)</option> <option value="e">Optimize GAMMA/GAMMAI model/branches (e)</option> <option value="g">Compute per-site log likelihoods for -z trees (g)</option> <option value="h">Compute log likelihood test for -t / -z trees (h)</option> @@ -180,19 +157,14 @@ <param name="multiple_model" format="txt" type="data" label="Multiple model assignment to alignment partitions" optional="True" help="Specify the file name which contains the assignment of models to alignment partitions for multiple models of substitution. For the syntax of this file please consult the manual." /> <!-- ## (-x) --> - <param name="rapid_bootstrap_random_seed" type="integer" value='1234567890' size="7" label="Rapid bootstrapping random seed" optional="True" help="Specify a random seed and turn on rapid bootstrapping. CAUTION: unlike in version 7.0.4 RAxML will conduct rapid BS replicates under the model of rate heterogeneity you specified via '-m' and not by default under CAT." /> -<!-- </when> --> - - -<!-- </conditional> --> + <param name="rapid_bootstrap_random_seed" type="integer" value='12345' size="7" label="Rapid bootstrapping random seed" optional="True" help="Specify a random seed and turn on rapid bootstrapping. CAUTION: unlike in version 7.0.4 RAxML will conduct rapid BS replicates under the model of rate heterogeneity you specified via '-m' and not by default under CAT." /> <param name="out" type="select" label="What format of file do you want for your output (concatenation of the sequences) ? "> <option value="nothing">No output</option> <option value="fasta">Fasta format</option> - <option value="phylip">Phylip format</option> + <option value="phylip">Phylip format</option> <option value="nexus">Nexus format</option> </param> - <!-- -m GTRGAMMA -N 100 -f a -x 12345 --> <param name="raxml1" type="boolean" label="Do you want the output of RAxML : best tree ? " /> <param name="raxml3" type="boolean" label="Do you want the output of RAxML : bi-partition ? " /> @@ -230,11 +202,11 @@ <data name="out_raxml1" format="txt" label="Phylogeny_RAxML_BestTree" from_work_dir="RAxML_bestTree.galaxy_run"> <filter>raxml1 == True</filter> </data> - + <data name="out_raxml3" format="txt" label="Phylogeny_RAxML_BiPartition" from_work_dir="RAxML_bipartitions.galaxy_run"> <filter>raxml3 == True</filter> </data> - + <data name="out_raxml4" format="txt" label="Phylogeny_RAxML_BootStrap" from_work_dir="RAxML_bootstrap.galaxy_run"> <filter>raxml4 == True</filter> </data> @@ -242,10 +214,10 @@ <tests> <test> - <param name="zip" ftype="zip" value="from_filter_oase.zip" /> - <conditional name="format"> + <param name="input_filter_assemblies" ftype="fasta" value="input_filter_assemblies/AcAcaud_trinity.fasta,input_filter_assemblies/AmAmphi_trinity.fasta,input_filter_assemblies/ApApomp_trinity.fasta,input_filter_assemblies/PgPgras_trinity.fasta,input_filter_assemblies/PhPhess_trinity.fasta,input_filter_assemblies/ThThelep_trinity.fasta" /> + <param name="input_alignments" ftype="fasta" value="input_from_CDS_Search/locus17_sp3_sp3.fasta,input_from_CDS_Search/locus147_sp3_sp3.fasta,input_from_CDS_Search/locus183_sp3_sp3.fasta,input_from_CDS_Search/locus334_sp3_sp3.fasta" /> + <conditional name="format"> <param name="format_run" value="nucleic" /> - <param name="zip_nuc" ftype="zip" value="test_05_output_CDS_Search_input_ConcatPhyl.zip" /> <param name="base_model" value="GTRGAMMA" /> </conditional> <param name="random_seed" value="1234567890" /> @@ -253,34 +225,70 @@ <param name="number_of_runs_bootstop" value="" /> <param name="search_algorithm" value="d" /> <!-- <param name="multiple_model" value="" /> --> - <param name="rapid_bootstrap_random_seed" value="123456789" /> - <param name="out" value="nothing" /> + <param name="rapid_bootstrap_random_seed" value="123456789" /> + <param name="out" value="nothing" /> <param name="raxml1" value="True" /> <param name="raxml3" value="True" /> - <param name="raxml4" value="True" /> + <param name="raxml4" value="True" /> <output name="out_raxml4"> - <assert_contents> - <has_text text="(Ap,(((Pf,Ph),Pg),((Pu,Te),(Am,Th))),Ac);"/> - <has_text text="(Ap,(Ph,(Pg,((Pf,(Pu,Te)),(Am,Th)))),Ac);"/> - <has_text text="(Ap,(((Pu,Te),(Am,Th)),((Pf,Ph),Pg)),Ac);"/> + <assert_contents> + <has_text text="((Pg,(Am,Th)),(Ph,Ap),Ac);"/> + <has_text text="((Th,(Pg,Am)),(Ph,Ap),Ac);"/> + <has_text text="((Ph,Ap),(Am,(Pg,Th)),Ac);"/> </assert_contents> - </output> - + </output> </test> + + <test> + <param name="input_filter_assemblies" ftype="fasta" value="input_filter_assemblies/AcAcaud_trinity.fasta,input_filter_assemblies/AmAmphi_trinity.fasta,input_filter_assemblies/ApApomp_trinity.fasta,input_filter_assemblies/PgPgras_trinity.fasta,input_filter_assemblies/PhPhess_trinity.fasta,input_filter_assemblies/ThThelep_trinity.fasta" /> + <param name="input_alignments" ftype="fasta" value="input_from_CDS_Search/locus17_sp3_sp3.fasta,input_from_CDS_Search/locus147_sp3_sp3.fasta,input_from_CDS_Search/locus183_sp3_sp3.fasta,input_from_CDS_Search/locus334_sp3_sp3.fasta" /> + <conditional name="format"> + <param name="format_run" value="nucleic" /> + <param name="base_model" value="GTRGAMMA" /> + </conditional> + <param name="random_seed" value="1234567890" /> + <param name="number_of_runs" value="100" /> + <param name="number_of_runs_bootstop" value="" /> + <param name="search_algorithm" value="a" /> + <param name="rapid_bootstrap_random_seed" value="1234567890" /> + <param name="out" value="nothing" /> + <param name="raxml1" value="True" /> + <param name="raxml3" value="True" /> + <param name="raxml4" value="True" /> + <output name="out_raxml1" value="RAxML_bestTree"/> + <output name="out_raxml3" value="RAxML_bipartitions"/> + </test> + + <test> + <param name="input_filter_assemblies" ftype="fasta" value="input_filter_assemblies/AcAcaud_trinity.fasta,input_filter_assemblies/AmAmphi_trinity.fasta,input_filter_assemblies/ApApomp_trinity.fasta,input_filter_assemblies/PgPgras_trinity.fasta,input_filter_assemblies/PhPhess_trinity.fasta,input_filter_assemblies/ThThelep_trinity.fasta" /> + <param name="input_alignments" ftype="fasta" value="input_from_CDS_Search/locus17_sp3_sp3.fasta,input_from_CDS_Search/locus147_sp3_sp3.fasta,input_from_CDS_Search/locus183_sp3_sp3.fasta,input_from_CDS_Search/locus334_sp3_sp3.fasta" /> + <conditional name="format"> + <param name="format_run" value="nucleic" /> + <param name="base_model" value="GTRGAMMA" /> + </conditional> + <param name="random_seed" value="1234567890" /> + <param name="number_of_runs" value="100" /> + <param name="number_of_runs_bootstop" value="autoMR" /> + <param name="search_algorithm" value="a" /> + <param name="rapid_bootstrap_random_seed" value="1234567890" /> + <param name="out" value="nothing" /> + <param name="raxml1" value="True" /> + <param name="raxml3" value="True" /> + <param name="raxml4" value="True" /> + <output name="out_raxml1" value="RAxML_bestTree_test3"/> + <output name="out_raxml3" value="RAxML_bipartitions_test3"/> + </test> </tests> <help> +@HELP_AUTHORS@ + ============ What it does ============ -| This tool takes a zip file containing nucleic fasta sequence files and searches different homologous genes from pairwise comparisons. -| -| -| The run RAxML was written by **Alexandros Stamatakis**. -| The script was written by **Eric Fontanillas**. -| The wrapper was written by **Julie Baffard**. +| This tool takes a 'dataset collection list' containing nucleic fasta sequence files and searches different homologous genes from pairwise comparisons. -------- @@ -289,38 +297,41 @@ ========== | The choice of the format sequences is possible : **proteic** or **nucleic** -| +| -The choice of parameters for the RAxML run is possible : +The choice of parameters for the RAxML run is possible : **-m** : - | is the option for the choice of the substitution model. + | is the option for the choice of the substitution model. | By default it's GTRGAMMA. - | + | **-N** : | is the option for the choice of the number of run | by default it's 100 - | + | **rapid bootstrapping** : | is the option to have, in addition to the best tree search, the rapid bootstrapping | this translates by : -x 12345 -f a | by default, this option is choosen - | + | +.. class:: warningmark +| RAxML has some incompatible parameters. +| The search algorithm compatible with boostrapping and giving a besttree file is the one set by default: +| -f a + +| The search algorithm compatible with boostrapping and NOT giving a besttree file are: +| -f d +| -f o +| -f t -------- ====== Inputs ====== -option **Select a zip file containing the input files** : - -| the input zip file must have the extension .ort.zip -| At the beginning, when you upload your input, you have to change the extension .zip to .ort.zip - - -------- ======= @@ -331,49 +342,49 @@ **Phylogeny** : | is the general output. It gives the information about the concatenation (statistics) and the RAxML run. - | + | **Phylogeny_concatenation_fasta_aa** : | is the output which contains the sequences concatenated in fasta format when you choose the option proteic - | + | **Phylogeny_concatenation_phylip_aa** : | is the output which contains the sequences concatenated in phylip format when you choose the option proteic - | + | **Phylogeny_concatenation_nexus_aa** : | is the output which contains the sequences concatenated in nexus format when you choose the option proteic - | + | **Phylogeny_concatenation_fasta_nuc** : | is the output which contains the sequences concatenated in fasta format when you choose the option nucleic - | + | **Phylogeny_concatenation_phylip_nuc** : | is the output which contains the sequences concatenated in phylip format when you choose the option nucleic | it's this output which is used for the RAxML run - | + | **Phylogeny_concatenation_nexus_nuc** : | is the output which contains the sequences concatenated in nexus format when you choose the option nucleic - | + | **Phylogeny_RAxML_BestTree** : | is the output of RAxML run which contains the Best Tree found - | + | **Phylogeny_RAxML_BiPartitionBranchLabel** : | is the output of RAxML run which contains the Best Tree found with supported values as branch labels - | + | **Phylogeny_RAxML_BiPartition** : | is the output of RAxML run which contains the Best Tree found with supported values - | + | **Phylogeny_RAxML_BootStrap** : | is the output of RAxML run which contains all the boostrapped trees | the number of boostraped trees depending of the option -N (number of run) - | + | -------- @@ -387,12 +398,12 @@ **Input files** | 6 files with 200 nucleic sequences each - | a zip file containing 2 locus aligned without indel (in nucleic format) - | + | a 'dataset collection list' containing 2 locus aligned without indel (in nucleic format) + | **Parameters** | option : nucleic | no option for the RAxML run, so by default it's : -m GTRGAMMA -N 100 -f a -x 12345 - | + | ---------------- The output files @@ -401,16 +412,16 @@ **Phylogeny** : | ******************** CONCATENATION ******************** -| +| | Process nucleotides concatenation: | Number of taxa aligned = 6 | Number of loci concatenated = 2 -| +| | Total length of the concatenated sequences [All codon positions] = 504 | Total length of the concatenated sequences [Codon positions 1 and 2] = 336 | Total length of the concatenated sequences [Codon position 3] = 168 -| -| +| +| | | ******************** RAxML RUN ******************** | @@ -418,7 +429,7 @@ the informations of the RAxML run | - + **Phylogeny_concatenation_fasta_nuc** : | >Ps @@ -458,7 +469,7 @@ .. class:: infomark | If you choose the option proteic : you obtain a file with proteic sequences -| +| | @@ -480,7 +491,7 @@ | Pp | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------ataatccttgacgaccacacactgcatccaacaacttttctggccttgccttccttgtctattttacacaaaccagcccat -| +| | Ap cgcagctcctcggtgacgcggtgcagctcggcggcgaggcgatcggctctctcctcggctgccctgcgggcgttcagggcctcatcaaggtcggcctgcatggcggcgatgtcgccctccatgcggcgcttgtcgccggtcagagtcgtcaccgtgatgttcagctcgttgacgcgagccgtggcgtcgtgtagctcgttctcggcattcttacgagctcgttcggc @@ -500,7 +511,7 @@ .. class:: infomark | If you choose the option proteic : you obtain a file with proteic sequences -| +| | **Phylogeny_concatenation_nexus_nuc** : @@ -538,19 +549,19 @@ | ; | End; -| +| .. class:: infomark | If you choose the option proteic : you obtain a file with proteic sequences -| +| | **Phylogeny_RAxML_BestTree** : | ((Ac:0.02889451913999640381,Ap:0.01674414484251282934):0.17730049470177636217, | ((Pp:0.23405795780876006984,Pg:0.02012322210145659623):0.14429203507314311561,Pf:0.09977363663005259231):0.04320803212100913365,Ps:0.08351583721596630983):0.0; -| +| | @@ -558,8 +569,8 @@ | (Pg:0.02012322210145659623,(Pf:0.09977363663005259231,(Ps:0.08351583721596630983, | (Ac:0.02889451913999640381,Ap:0.01674414484251282934):0.17730049470177636217[89]):0.04320803212100913365[42]):0.14429203507314311561[70],Pp:0.23405795780876006984); -| -| +| +| **Phylogeny_RAxML_BiPartition** : @@ -567,8 +578,8 @@ (Pg:0.02012322210145659623,(Pf:0.09977363663005259231,(Ps:0.08351583721596630983, (Ac:0.02889451913999640381,Ap:0.01674414484251282934)89:0.17730049470177636217)42:0.04320803212100913365)70:0.14429203507314311561,Pp:0.23405795780876006984); -| -| +| +| **Phylogeny_RAxML_BootStrap** : @@ -582,6 +593,25 @@ ... +--------------------------------------------------- + +Changelog +--------- + +**Version 2.0 - 06/07/2017** + + - NEW: Replace the zip between tools by Dataset Collection + - Corrected bug : output files were empty due to errors in the command section (incompatible parameters set by default instead of the ones mentioned in the help) + + +**Version 1.0 - 13/04/2017** + + - Add funtional test with planemo + + - Planemo test with conda dependencies for raxml and python + + - Scripts renamed + symlinks to the directory 'scripts' + </help> <expand macro="citations" />