view data_manager/data_manager_fetch_mothur_reference_data.xml @ 3:2004bb845685 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mothur_toolsuite/ commit f845716f6ac93500f143a30abef97eaba406344e"
author iuc
date Fri, 25 Jun 2021 09:36:36 +0000
parents 2ffd2cdc5089
children
line wrap: on
line source

<?xml version="1.0"?>
<tool id="data_manager_fetch_mothur_reference_data" name="Fetch Mothur toolsuite reference data" version="0.2.1" tool_type="manage_data" profile="19.05">
    <description>Fetch and install reference data for Mothur</description>
    <requirements>
        <requirement type="package" version="3.8">python</requirement>
    </requirements>
    <command><![CDATA[
        python '$__tool_directory__/fetch_mothur_reference_data.py'
        --source=$data_source.data_source_selector
        #if str( $data_source.data_source_selector ) == "mothur_website"
           --datasets '${data_source.ref_data}'
        #elif str( $data_source.data_source_selector ) == "filesystem_paths"
           --description '${data_source.description}'
           --paths '${data_source.paths}'
           #if $data_source.create_symlink
             --link
           #end if
        #end if
        '${out_file}'
    ]]></command>
    <inputs>
        <conditional name="data_source">
            <param name="data_source_selector" type="select"
                   label="Choose the source for the reference data">
                <option value="mothur_website">Mothur website</option>
                <option value="filesystem_paths">Filesystem paths</option>
            </param>
            <when value="mothur_website">
                <param name="ref_data" type="select" display="checkboxes" multiple="true"
                       label="Reference dataset to install">
                    <option value="lookup_titanium">GS FLX Titanium lookup files</option>
                    <option value="lookup_gsflx">GSFLX lookup files</option>
                    <option value="lookup_gs20">GS20 lookup files</option>
                    <option value="RDP_v18">RDP reference files (training set version 18)</option>
                    <option value="RDP_v16">RDP reference files (training set version 16)</option>
                    <option value="RDP_v14">RDP reference files (training set version 14)</option>
                    <option value="RDP_v10">RDP reference files (training set version 10)</option>
                    <option value="RDP_v9">RDP reference files (training set version 9)</option>
                    <option value="RDP_v7">RDP reference files (training set version 7)</option>
                    <option value="RDP_v6">RDP reference files (training set version 6)</option>
                    <option value="silva_release_138.1">SILVA reference files (release 138.1)</option>
                    <option value="silva_release_128">SILVA reference files (release 128)</option>
                    <option value="silva_release_123">SILVA reference files (release 123)</option>
                    <option value="silva_release_119">SILVA reference files (release 119)</option>
                    <option value="silva_release_102">SILVA reference files (release 102)</option>
                    <option value="greengenes_August2013">Greengenes reference taxonomy and alignment v13.8 (August 2013)</option>
                    <option value="greengenes_May2013">Greengenes reference taxonomy and alignment v13.5 (May 2013)</option>
                    <option value="greengenes_old">Greengenes reference taxonomy and alignment (pre-May 2013)</option>
                    <option value="greengenes_gold_alignment">Greengenes gold alignment</option>
                    <option value="secondary_structure_maps_silva">SILVA secondary structure maps</option>
                    <option value="secondary_structure_maps_greengenes">Greengenes secondary structure maps</option>
                </param>
            </when>
            <when value="filesystem_paths">
                <param name="description" type="text" value="" size="50"
                       label="Description of the data" optional="False" />
                <param name="paths" type="text" value="" area="True" size="10x50"
                       label="Paths to upload" optional="False"
                       help="Upload all files pasted in the box. The (recursive) contents of any pasted directories will be added as well." />
                <param type="boolean" name="create_symlink" truevalue="create_symlink"
                       falsevalue="copy_file"
                       label="Create symlinks to data instead of copying into Galaxy" checked="on" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="out_file" format="data_manager_json" />
    </outputs>
    <tests>
        <test>
            <param name="data_source|ref_data" value="lookup_titanium"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="GS FLX Titanium" />
                    <has_text text="LookUp_Titanium.pat" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="data_source|ref_data" value="lookup_gsflx"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="GSFLX" />
                    <has_text text="LookUp_GSFLX.pat" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="data_source|ref_data" value="lookup_gs20"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="GS20" />
                    <has_text text="LookUp_GS20.pat" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="data_source|ref_data" value="RDP_v18"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="16S rRNA RDP training set 18" />
                    <has_text text="trainset18_062020.rdp.fasta" />
                    <has_text text="trainset18_062020.rdp.tax" />
                    <has_text text="trainset18_062020.pds.fasta" />
                    <has_text text="trainset18_062020.pds.tax" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="data_source|ref_data" value="RDP_v16"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="16S rRNA RDP training set 16" />
                    <has_text text="trainset16_022016.rdp.fasta" />
                    <has_text text="trainset16_022016.rdp.tax" />
                    <has_text text="trainset16_022016.pds.fasta" />
                    <has_text text="trainset16_022016.pds.tax" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="data_source|ref_data" value="RDP_v14"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="16S rRNA RDP training set 14" />
                    <has_text text="trainset14_032015.rdp.fasta" />
                    <has_text text="trainset14_032015.rdp.tax" />
                    <has_text text="trainset14_032015.pds.fasta" />
                    <has_text text="trainset14_032015.pds.tax" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="data_source|ref_data" value="RDP_v10"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="16S rRNA RDP training set 10" />
                    <has_text text="trainset10_082014.rdp.fasta" />
                    <has_text text="trainset10_082014.rdp.tax" />
                    <has_text text="trainset10_082014.pds.fasta" />
                    <has_text text="trainset10_082014.pds.tax" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="data_source|ref_data" value="RDP_v9"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="16S rRNA PDS training set 9" />
                    <has_text text="trainset9_032012.rdp.fasta" />
                    <has_text text="trainset9_032012.rdp.tax" />
                    <has_text text="trainset9_032012.pds.fasta" />
                    <has_text text="trainset9_032012.pds.tax" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="data_source|ref_data" value="RDP_v7"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="16S rRNA RDP training set 7" />
                    <has_text text="FungiLSU_train_1400bp_8506_mod.fasta" />
                    <has_text text="FungiLSU_train_1400bp_8506_mod.tax" />
                    <has_text text="trainset7_112011.rdp.fasta" />
                    <has_text text="trainset7_112011.rdp.tax" />
                    <has_text text="trainset7_112011.pds.fasta" />
                    <has_text text="trainset7_112011.pds.tax" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="data_source|ref_data" value="RDP_v6"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="RDP training set 6" />
                    <has_text text="trainset6_032010.rdp.fasta" />
                    <has_text text="trainset6_032010.rdp.tax" />
                </assert_contents>
            </output>
        </test>
        <!-- SILVA data is to large (>1GB each) for CI testing on github actions
             so we skip them -->
        <!--<test>
            <param name="data_source|ref_data" value="silva_release_128"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="SILVA release 128" />
                    <has_text text="silva.nr_v128.tax" />
                    <has_text text="silva.seed_v128.tax" />
                    <has_text text="silva.nr_v128.align" />
                    <has_text text="silva.seed_v128.align" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="data_source|ref_data" value="silva_release_123"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="SILVA release 123" />
                    <has_text text="silva.nr_v123.align" />
                    <has_text text="silva.seed_v123.align" />
                    <has_text text="silva.nr_v123.tax" />
                    <has_text text="silva.seed_v123.tax" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="data_source|ref_data" value="silva_release_119"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="SILVA release 119" />
                    <has_text text="silva.nr_v119.align" />
                    <has_text text="silva.seed_v119.align" />
                    <has_text text="silva.nr_v119.tax" />
                    <has_text text="silva.seed_v119.tax" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="data_source|ref_data" value="silva_release_102"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="SILVA release 102" />
                    <has_text text="silva.bacteria.fasta" />
                    <has_text text="silva.gold.ng.fasta" />
                    <has_text text="nogap.archaea.fasta" />
                    <has_text text="silva.archaea.fasta" />
                    <has_text text="nogap.eukarya.fasta" />
                    <has_text text="silva.eukarya.fasta" />
                    <has_text text="silva.bacteria.gg.tax" />
                    <has_text text="silva.bacteria.ncbi.tax" />
                    <has_text text="silva.bacteria.rdp.tax" />
                    <has_text text="silva.bacteria.rdp6.tax" />
                    <has_text text="silva.bacteria.silva.tax" />
                    <has_text text="silva.archaea.gg.tax" />
                    <has_text text="silva.archaea.ncbi.tax" />
                    <has_text text="silva.archaea.rdp.tax" />
                    <has_text text="silva.archaea.silva.tax" />
                    <has_text text="silva.eukarya.ncbi.tax" />
                    <has_text text="silva.eukarya.silva.tax" />
                </assert_contents>
            </output>
        </test>-->

        <!-- also greengenes is large (400MB-1.5GB) so only tests for older
             (smaller) releases are executed -->
        <!--<test>
            <param name="data_source|ref_data" value="greengenes_August2013"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="Greengenes August 2013" />
                    <has_text text="gg_13_8_99.gg.tax" />
                    <has_text text="gg_13_8_99.fasta" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="data_source|ref_data" value="greengenes_May2013"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="Greengenes May 2013" />
                    <has_text text="gg_13_5_99.pds.tax" />
                    <has_text text="gg_13_5_99.gg.tax" />
                    <has_text text="gg_13_5_99.align" />
                    <has_text text="gg_13_5_99.fasta" />
                </assert_contents>
            </output>
        </test>-->
        <test>
            <param name="data_source|ref_data" value="greengenes_old"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="Greengenes pre-May 2013" />
                    <has_text text="gg_99.pds.tax" />
                    <has_text text="core_set_aligned.imputed.fasta" />
                    <has_text text="gg_99.pds.ng.fasta" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="data_source|ref_data" value="greengenes_gold_alignment"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="Greengenes gold alignment" />
                    <has_text text="rRNA16S.gold.NAST_ALIGNED.fasta" />
                </assert_contents>
            </output>
        </test>

        <test>
            <param name="data_source|ref_data" value="secondary_structure_maps_silva"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="SILVA" />
                    <has_text text="silva.ss.map" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="data_source|ref_data" value="secondary_structure_maps_greengenes"/>
            <output name="out_file">
                <assert_contents>
                    <has_text text="Greengenes" />
                    <has_text text="gg.ss.map" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>
.. class:: infomark

**What it does**

This tool fetches reference data used by the mothur_toolsuite set of Galaxy tools,
and populates the appropriate data tables.

The reference data can be imported directly from the Mothur website, or from files
in a server directory.

Files are added to the following data tables based on file extension:

 * **mothur_lookup**: for .pat files
 * **mothur_aligndb**: for .fasta files
 * **mothur_map**: for .map files
 * **mothur_taxonomy**: for .tax files

------

**Importing from Mothur website**

Reference data sets provided by the Mothur developers can be downloaded from the
Mothur website. See the following pages to get more information about each dataset:

 * Lookup data:        http://www.mothur.org/wiki/Lookup_files
 * RDP reference data: http://www.mothur.org/wiki/RDP_reference_files
 * Silva data:         http://www.mothur.org/wiki/Silva_reference_files
 * Greengenes data:    http://www.mothur.org/wiki/Greengenes-formatted_databases
 * Secondary structure maps: http://www.mothur.org/wiki/Secondary_structure_map

**Importing from file system paths**

If reference data is already on the server filesystem then use this option to
import it into the Mothur data tables. The appropriate data tables are determined
based on the file extensions.

Optionally a description can be added which will appear next to the base of the
reference file name in the data table entry.

------

.. class:: warningmark

**A note on Lane masks**

Lane mask data is also available via the Mothur website (files ending in ".filter"):

 * http://www.mothur.org/wiki/Lane_mask

but as these data are not currently used in the toolsuite, they cannot be imported
using this data manager.

    </help>
</tool>