diff metaphlan2.xml @ 29:713a83023626 draft

planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/metaphlan2/ commit 450bf3326f301c344103272b0d761e8625ce0c44-dirty
author bebatut
date Wed, 01 Jun 2016 10:43:23 -0400
parents 1d993cca38e3
children 99fe591b59fb
line wrap: on
line diff
--- a/metaphlan2.xml	Thu Apr 28 07:04:50 2016 -0400
+++ b/metaphlan2.xml	Wed Jun 01 10:43:23 2016 -0400
@@ -16,44 +16,48 @@
     </version_command>
 
     <command>
-<![CDATA[        
+<![CDATA[
         (which bowtie2 || exit 200)
 
         &&
 
+        #if $db.db_selector == "history"
+            mkdir ref_db
+            &&
+            bowtie2-build $db.db_sequences ref_db/ref_db
+            &&
+            python $__tool_directory__/transform_json_to_pkl.py
+                --json_input $db_metadata
+                --pkl_output ref_db/metadata.pkl
+            &&
+        #end if
+
         metaphlan2.py
             $input_file
             -o $output_file
-
             --input_type ${input_file.datatype.file_ext}
 
-            #if $mpa_pkl.mpa_pkl_selector == "cached"
-                #set $mpa_pkl_table = dict([(_[0], _[2]) for _ in $mpa_pkl.cached_mpa_pkl.input.options.tool_data_table.data])
-                #set $mpa_pkl_db = $mpa_pkl.cached_mpa_pkl.value
-                --mpa_pkl $mpa_pkl_table[$mpa_pkl_db]
-            #else
-                --mpa_pkl $mpa_pkl.history_mpa_pkl
-            #end if
-
             --bowtie2_exe `which bowtie2`
 
-            #if $bowtie2db.bowtie2db_selector == "cached"
-                #set $bowtie2_table = dict([(_[0], _[2]) for _ in $bowtie2db.cached_bowtie2db.input.options.tool_data_table.data])
-                #set $bowtie2db_choice = $bowtie2db.cached_bowtie2db.value
-                --bowtie2db $bowtie2_table[$bowtie2db_choice]
+            #if $db.db_selector == "cached"
+                #set $table = dict([(_[0], _[2]) for _ in $db.cached_db.input.options.tool_data_table.data])
+                #set $db_choice = $db.cached_db.value
+                --bowtie2db $table[$db_choice]
+                --mpa_pkl $table[$db_choice]".pkl"
             #else
-                --bowtie2db $bowtie2db.history_bowtie2db
+                --bowtie2db ref_db/ref_db
+                --mpa_pkl ref_db/metadata.pkl
             #end if
 
             --no_map
 
             -t $analysis_type.analysis_type_select
             #if $analysis_type.analysis_type_select == "rel_ab"
-                --tax_lev $analysis_type.taxonomic_level 
+                --tax_lev $analysis_type.taxonomic_level
             #else if $analysis_type.analysis_type_select == "marker_ab_table"
                 --nreads $analysis_type.nreads
             #else if $analysis_type.analysis_type_select == "marker_pres_table"
-                --pres_th $analysis_type.pres_th 
+                --pres_th $analysis_type.pres_th
             #end if
 
             --min_cu_len $min_cu_len
@@ -65,54 +69,32 @@
             $ignore_archaea
 
             --stat_q $stat_q
-
-            #if $sam_output
-                -s $sam_output_file
-            #end if
-
-            #if $biom_output
-                --biom $biom_output_file
-            #end if
+            -s $sam_output_file
 ]]>
     </command>
 
     <inputs>
-        <param name="input_file" type="data" format="fastq,fasta,sam,bowtie2out" label="Input file" help=""/>
+        <param name="input_file" type="data" format="fastq,fasta,sam" label="Input file" help=""/>
 
-        <conditional name="mpa_pkl">
-            <param name="mpa_pkl_selector" type="select" label="Metadata" help="">
+        <conditional name="db">
+            <param name="db_selector" type="select" label="Database with clade-specific marker genes" help="">
                 <option value="cached" selected="true">Locally cached</option>
                 <option value="history">From history</option>
             </param>
 
             <when value="cached">
-                <param name="cached_mpa_pkl" label="Cached metadata" type="select">
-                <options from_data_table="metaphlan2_metadata" />
+                <param name="cached_db" label="Cached database with clade-specific marker genes" type="select" >
+                <options from_data_table="metaphlan2_db" />
                 </param>
             </when>
             <when value="history">
-                <param name="history_mpa_pkl" type="data" format="fasta" label="Metadata from history"/>
-            </when>
-        </conditional>
-
-        <conditional name="bowtie2db">
-            <param name="bowtie2db_selector" type="select" label="BowTie2 database" help="">
-                <option value="cached" selected="true">Locally cached</option>
-                <option value="history">From history</option>
-            </param>
-
-            <when value="cached">
-                <param name="cached_bowtie2db" label="Cached BowTie2 database" type="select" >
-                <options from_data_table="metaphlan2_bowtie_db" />
-                </param>
-            </when>
-            <when value="history">
-                <param name="history_bowtie2db" type="data" format="fasta" label="BowTie2 database from history"/>
+                <param name="db_sequences" type="data" format="fasta" label="Database with clade-specific marker genes from history" help="(--bowtie2db)"/>
+                <param name="db_metadata" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history" help="(--mpa_pkl)"/>
             </when>
         </conditional>
 
         <conditional name="analysis_type">
-            <param name="analysis_type_select" type="select" label="Type of analysis to perform">
+            <param name="analysis_type_select" type="select" label="Type of analysis to perform" help="(-t)">
               <option value="rel_ab" selected="true">Profiling a metagenomes in terms of relative abundances</option>
               <option value="reads_map">Mapping from reads to clades (only reads hitting a marker)</option>
               <option value="clade_profiles">Normalized marker counts for clades with at least a non-null marker</option>
@@ -122,7 +104,7 @@
             </param>
 
             <when value="rel_ab">
-              <param name="taxonomic_level" type="select" label="Taxonomic level for the relative abundance output">
+              <param name="taxonomic_level" type="select" label="Taxonomic level for the relative abundance output" help="(--tax_lev)">
                 <option value="a" selected="true">All taxonomic levels</option>
                 <option value="k">Kingdoms (Bacteria and Archaea) only</option>
                 <option value="p">Phyla only</option>
@@ -160,34 +142,19 @@
         <param name="ignore_archaea" type='boolean' checked="true" truevalue='' falsevalue='--ignore_archaea' label="Profile archea organisms?" help="" />
 
         <param name="stat_q" type="float" value="0.1" label="Quantile value for the robust average" help=""/>
-
-        <param name="sam_output" type='boolean' label="Output a sam file?" help="" />
-
-        <param name="biom_output" type='boolean' label="Output a biom file?" help="" />
     </inputs>
 
     <outputs>
-        <data format="tabular" name="output_file" 
-            metadata="input_sequence_file" 
-            label="${tool.name} on ${on_string}: Community profile" />
-
-        <data format="sam" name="sam_output_file" 
-            metadata="input_sequence_file" 
-            label="${tool.name} on ${on_string}: Sam file">
-            <filter>sam_output</filter>
-        </data>
-        <data format="biom" name="biom_output_file" 
-            metadata="input_sequence_file" 
-            label="${tool.name} on ${on_string}: Biom file">
-            <filter>biom_output</filter>
-        </data>
+        <data format="tabular" name="output_file" label="${tool.name} on ${on_string}: Community profile" />
+        <data format="sam" name="sam_output_file" label="${tool.name} on ${on_string}: Sam file" />
     </outputs>
 
     <tests>
         <test>
-            <param name="input_file" value="metaphlan2_input_sequences.fastq"/>
-            <param name="mpa_pkl" value="" />
-            <param name="bowtie2db" value="" />
+            <param name="input_file" value="input_sequences.fasta"/>
+            <param name="db_selector" value="history" />
+            <param name="db_metadata" value="marker_metadata.json" />
+            <param name="db_sequences" value="marker_sequences.fasta" />
             <param name="analysis_type_select" value="rel_ab" />
             <param name="taxonomic_level" value="a" />
             <param name="min_cu_len" value="2000" />
@@ -197,47 +164,50 @@
             <param name="ignore_bacteria" value="" />
             <param name="ignore_archaea" value="" />
             <param name="stat_q" value="0.1" />
-            <param name="sam_output" value='false' />
-            <param name="biom_output" value='false' />
-            <output name="output_file" file="metaphlan2_profiled_metagenome.txt"/>
+            <output name="output_file" file="community_profile.tabular"/>
         </test>
     </tests>
 
     <help><![CDATA[
-
 **What it does**
 
 MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, Archaea, Eukaryotes and Viruses) from metagenomic shotgun sequencing data with species level resolution. For more information, check the `user manual <https://bitbucket.org/biobakery/metaphlan2/>`_.
 
-Metaphlan2 takes as input a sequence file in fasta, fastq, a BowTie2 produced
-SAM file or an intermediary mapping file of the metagenome generated by a 
-previous MetaPhlAn
+**Inputs**
 
-Several parameters can modulate the MetaPhlAn execution
+Metaphlan2 takes as input a sequence file in fasta, fastq, a BowTie2 produced SAM file.
 
-    * Mapping arguments
+It is also possible to use a custom database with clade-specific marker genes. In this case, a fasta file with marker gene sequences is required and also a file containing metadata. This file is a json file with:
 
-        * Test to avoid saving the output of BowTie2
-
-    * Post-mapping arguments
+::
 
-        * Taxonomic level for the relative abundance output
-        * Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances
-        * Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded
-        * Tests to avoid profiling of virus, eukaryotes, bacteria and/or archea
-        * Quantile value
+  {
+    "taxonomy": {
+            "taxonomy of genome1": genome1_length,
+            "taxonomy of genome2": genome2_length,
+            ...
+        }
+    "markers": {
+            "marker1_name": {
+                "clade": the clade that the marker belongs to,
+                "ext": [list of external genomes where the marker appears],
+                "len": length of the marker,
+                "score": score of the marker,
+                "taxon": the taxon of the marker
+            }
+            ...
+        }
+  }
 
-    * Additional analysis types and arguments
-
-        * Type of analyse to perform and some parameters for specific analysis type
+The marker names correspond to sequence name in corresponding fasta file with marker gene sequences.
 
+**Outputs**
 
-The main output file is a tab-separated output file of the predicted taxon 
-relative abundances.
+The main output file is a tab-separated output file of the predicted taxon relative abundances.
 
     ]]></help>
 
     <citations>
         <citation type="doi">10.1038/nmeth.3589</citation>
     </citations>
-</tool>
\ No newline at end of file
+</tool>