Mercurial > repos > bebatut > metaphlan2
diff metaphlan2.xml @ 29:713a83023626 draft
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/metaphlan2/ commit 450bf3326f301c344103272b0d761e8625ce0c44-dirty
author | bebatut |
---|---|
date | Wed, 01 Jun 2016 10:43:23 -0400 |
parents | 1d993cca38e3 |
children | 99fe591b59fb |
line wrap: on
line diff
--- a/metaphlan2.xml Thu Apr 28 07:04:50 2016 -0400 +++ b/metaphlan2.xml Wed Jun 01 10:43:23 2016 -0400 @@ -16,44 +16,48 @@ </version_command> <command> -<![CDATA[ +<![CDATA[ (which bowtie2 || exit 200) && + #if $db.db_selector == "history" + mkdir ref_db + && + bowtie2-build $db.db_sequences ref_db/ref_db + && + python $__tool_directory__/transform_json_to_pkl.py + --json_input $db_metadata + --pkl_output ref_db/metadata.pkl + && + #end if + metaphlan2.py $input_file -o $output_file - --input_type ${input_file.datatype.file_ext} - #if $mpa_pkl.mpa_pkl_selector == "cached" - #set $mpa_pkl_table = dict([(_[0], _[2]) for _ in $mpa_pkl.cached_mpa_pkl.input.options.tool_data_table.data]) - #set $mpa_pkl_db = $mpa_pkl.cached_mpa_pkl.value - --mpa_pkl $mpa_pkl_table[$mpa_pkl_db] - #else - --mpa_pkl $mpa_pkl.history_mpa_pkl - #end if - --bowtie2_exe `which bowtie2` - #if $bowtie2db.bowtie2db_selector == "cached" - #set $bowtie2_table = dict([(_[0], _[2]) for _ in $bowtie2db.cached_bowtie2db.input.options.tool_data_table.data]) - #set $bowtie2db_choice = $bowtie2db.cached_bowtie2db.value - --bowtie2db $bowtie2_table[$bowtie2db_choice] + #if $db.db_selector == "cached" + #set $table = dict([(_[0], _[2]) for _ in $db.cached_db.input.options.tool_data_table.data]) + #set $db_choice = $db.cached_db.value + --bowtie2db $table[$db_choice] + --mpa_pkl $table[$db_choice]".pkl" #else - --bowtie2db $bowtie2db.history_bowtie2db + --bowtie2db ref_db/ref_db + --mpa_pkl ref_db/metadata.pkl #end if --no_map -t $analysis_type.analysis_type_select #if $analysis_type.analysis_type_select == "rel_ab" - --tax_lev $analysis_type.taxonomic_level + --tax_lev $analysis_type.taxonomic_level #else if $analysis_type.analysis_type_select == "marker_ab_table" --nreads $analysis_type.nreads #else if $analysis_type.analysis_type_select == "marker_pres_table" - --pres_th $analysis_type.pres_th + --pres_th $analysis_type.pres_th #end if --min_cu_len $min_cu_len @@ -65,54 +69,32 @@ $ignore_archaea --stat_q $stat_q - - #if $sam_output - -s $sam_output_file - #end if - - #if $biom_output - --biom $biom_output_file - #end if + -s $sam_output_file ]]> </command> <inputs> - <param name="input_file" type="data" format="fastq,fasta,sam,bowtie2out" label="Input file" help=""/> + <param name="input_file" type="data" format="fastq,fasta,sam" label="Input file" help=""/> - <conditional name="mpa_pkl"> - <param name="mpa_pkl_selector" type="select" label="Metadata" help=""> + <conditional name="db"> + <param name="db_selector" type="select" label="Database with clade-specific marker genes" help=""> <option value="cached" selected="true">Locally cached</option> <option value="history">From history</option> </param> <when value="cached"> - <param name="cached_mpa_pkl" label="Cached metadata" type="select"> - <options from_data_table="metaphlan2_metadata" /> + <param name="cached_db" label="Cached database with clade-specific marker genes" type="select" > + <options from_data_table="metaphlan2_db" /> </param> </when> <when value="history"> - <param name="history_mpa_pkl" type="data" format="fasta" label="Metadata from history"/> - </when> - </conditional> - - <conditional name="bowtie2db"> - <param name="bowtie2db_selector" type="select" label="BowTie2 database" help=""> - <option value="cached" selected="true">Locally cached</option> - <option value="history">From history</option> - </param> - - <when value="cached"> - <param name="cached_bowtie2db" label="Cached BowTie2 database" type="select" > - <options from_data_table="metaphlan2_bowtie_db" /> - </param> - </when> - <when value="history"> - <param name="history_bowtie2db" type="data" format="fasta" label="BowTie2 database from history"/> + <param name="db_sequences" type="data" format="fasta" label="Database with clade-specific marker genes from history" help="(--bowtie2db)"/> + <param name="db_metadata" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history" help="(--mpa_pkl)"/> </when> </conditional> <conditional name="analysis_type"> - <param name="analysis_type_select" type="select" label="Type of analysis to perform"> + <param name="analysis_type_select" type="select" label="Type of analysis to perform" help="(-t)"> <option value="rel_ab" selected="true">Profiling a metagenomes in terms of relative abundances</option> <option value="reads_map">Mapping from reads to clades (only reads hitting a marker)</option> <option value="clade_profiles">Normalized marker counts for clades with at least a non-null marker</option> @@ -122,7 +104,7 @@ </param> <when value="rel_ab"> - <param name="taxonomic_level" type="select" label="Taxonomic level for the relative abundance output"> + <param name="taxonomic_level" type="select" label="Taxonomic level for the relative abundance output" help="(--tax_lev)"> <option value="a" selected="true">All taxonomic levels</option> <option value="k">Kingdoms (Bacteria and Archaea) only</option> <option value="p">Phyla only</option> @@ -160,34 +142,19 @@ <param name="ignore_archaea" type='boolean' checked="true" truevalue='' falsevalue='--ignore_archaea' label="Profile archea organisms?" help="" /> <param name="stat_q" type="float" value="0.1" label="Quantile value for the robust average" help=""/> - - <param name="sam_output" type='boolean' label="Output a sam file?" help="" /> - - <param name="biom_output" type='boolean' label="Output a biom file?" help="" /> </inputs> <outputs> - <data format="tabular" name="output_file" - metadata="input_sequence_file" - label="${tool.name} on ${on_string}: Community profile" /> - - <data format="sam" name="sam_output_file" - metadata="input_sequence_file" - label="${tool.name} on ${on_string}: Sam file"> - <filter>sam_output</filter> - </data> - <data format="biom" name="biom_output_file" - metadata="input_sequence_file" - label="${tool.name} on ${on_string}: Biom file"> - <filter>biom_output</filter> - </data> + <data format="tabular" name="output_file" label="${tool.name} on ${on_string}: Community profile" /> + <data format="sam" name="sam_output_file" label="${tool.name} on ${on_string}: Sam file" /> </outputs> <tests> <test> - <param name="input_file" value="metaphlan2_input_sequences.fastq"/> - <param name="mpa_pkl" value="" /> - <param name="bowtie2db" value="" /> + <param name="input_file" value="input_sequences.fasta"/> + <param name="db_selector" value="history" /> + <param name="db_metadata" value="marker_metadata.json" /> + <param name="db_sequences" value="marker_sequences.fasta" /> <param name="analysis_type_select" value="rel_ab" /> <param name="taxonomic_level" value="a" /> <param name="min_cu_len" value="2000" /> @@ -197,47 +164,50 @@ <param name="ignore_bacteria" value="" /> <param name="ignore_archaea" value="" /> <param name="stat_q" value="0.1" /> - <param name="sam_output" value='false' /> - <param name="biom_output" value='false' /> - <output name="output_file" file="metaphlan2_profiled_metagenome.txt"/> + <output name="output_file" file="community_profile.tabular"/> </test> </tests> <help><![CDATA[ - **What it does** MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, Archaea, Eukaryotes and Viruses) from metagenomic shotgun sequencing data with species level resolution. For more information, check the `user manual <https://bitbucket.org/biobakery/metaphlan2/>`_. -Metaphlan2 takes as input a sequence file in fasta, fastq, a BowTie2 produced -SAM file or an intermediary mapping file of the metagenome generated by a -previous MetaPhlAn +**Inputs** -Several parameters can modulate the MetaPhlAn execution +Metaphlan2 takes as input a sequence file in fasta, fastq, a BowTie2 produced SAM file. - * Mapping arguments +It is also possible to use a custom database with clade-specific marker genes. In this case, a fasta file with marker gene sequences is required and also a file containing metadata. This file is a json file with: - * Test to avoid saving the output of BowTie2 - - * Post-mapping arguments +:: - * Taxonomic level for the relative abundance output - * Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances - * Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded - * Tests to avoid profiling of virus, eukaryotes, bacteria and/or archea - * Quantile value + { + "taxonomy": { + "taxonomy of genome1": genome1_length, + "taxonomy of genome2": genome2_length, + ... + } + "markers": { + "marker1_name": { + "clade": the clade that the marker belongs to, + "ext": [list of external genomes where the marker appears], + "len": length of the marker, + "score": score of the marker, + "taxon": the taxon of the marker + } + ... + } + } - * Additional analysis types and arguments - - * Type of analyse to perform and some parameters for specific analysis type +The marker names correspond to sequence name in corresponding fasta file with marker gene sequences. +**Outputs** -The main output file is a tab-separated output file of the predicted taxon -relative abundances. +The main output file is a tab-separated output file of the predicted taxon relative abundances. ]]></help> <citations> <citation type="doi">10.1038/nmeth.3589</citation> </citations> -</tool> \ No newline at end of file +</tool>