Mercurial > repos > oinizan > frogs

<?xml version="1.0"?>
<!--
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-->
<tool id="FROGS_normalisation" name="FROGS Abundance normalisation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
        <description>Normalise ASV abundance. </description>

    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>

	<command detect_errors="exit_code">
		[<![CDATA[
		normalisation.py
			--input-biom '$input_biom'
			--input-fasta '$input_fasta'

			#if $sampling_method.sampling_by_min == "yes"
				--sampling-by-min
			#else
				--num-reads '$sampling_method.num_reads'

				#if $sampling_method.delete_samples == "true"
				--delete-samples
				#end if
			#end if

			--output-biom '$output_biom'
			--output-fasta '$output_fasta'
			--summary-file '$summary_file'
		]]>
	</command>
	<inputs>
		<param format="fasta" argument="--input-fasta" type="data" label="Sequence file" help="Sequence file to normalise (format: fasta)." />
		<param format="biom1" argument="--input-biom" type="data" label="Abundance file" help="Abundance file to normalise (format: BIOM)." />
		<conditional name="sampling_method">
			<param argument="--sampling-by-min" type="select" label="Sampling method" help="Sampling by the number of sequences of the smallest sample, or select a number manually" display="radio">
            	<option value="yes" selected="true">Sampling by the number of sequences of the smallest sample</option>
            	<option value="no">Select a number of sequences</option>
			</param>
		<when value="yes"/>
		<when value="no">
			<param argument="--num-reads" type="integer" optional="true" min="1" value="" label="Number of reads" help="The final number of reads per sample." />
			<param argument="--delete-samples" type="select" label="Remove samples that have an initial number of reads below the number of reads to sample ?" display="radio">
                <option value="false">No, subsampling threshold need to at most equal to the smallest sample</option>
            	<option value="true">Yes, subsampling threshold may be greater than the smallest sample</option>
            </param>
		</when>
		</conditional>
	</inputs>
	<outputs>
		<data format="fasta" name="output_fasta" label="${tool.name}: normalised_sequences.fasta" from_work_dir="normalised.fasta" />
		<data format="biom1" name="output_biom" label="${tool.name}: normalised_abundance.biom" from_work_dir="normalised.biom" />
		<data format="html" name="summary_file" label="${tool.name}: report.html" from_work_dir="report.html" />
	</outputs>
    <tests>
        <test>
            <param name="input_fasta" value="references/08-affiliation_postprocessed.fasta"/>
	        <param name="input_biom" value="references/08-affiliation_postprocessed.biom"/>
        	<conditional name="sampling_method">
        	<param name="sampling_by_min" value="no"/>
        	<param name="num_reads" value="100"/>
        	</conditional>
            <output name="summary_file" file="references/09-normalisation.html" compare="sim_size" delta="0" />
            <output name="summary_file" file="references/09-normalisation.html" compare="diff" lines_diff="4" />
            <output name="output_fasta" file="references/09-normalisation.fasta" compare="diff" lines_diff="12" />
        </test>
    </tests>
	<help>

@HELP_LOGO@

.. class:: infomark page-header h2

What it does

This tool keeps, in each sample, the same number of elements by random sampling.

.. class:: infomark page-header h2

Inputs/outputs

.. class:: h3

Inputs

**Sequence file**:

The sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).

**Abundance file**:

The abundance of each ASV in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).

**Sampling method**:

Sequence normalisation method :

 - **Sampling by the number of sequences of the smallest sample** : Automatically detects the number of sequences of the smallest sample, and selects this number for all other samples.
 - **Select number of sequences** : Manually select a number of sequences.

With this number, a random draw of sequences is made and once normalisation is complete, each sample will contain the specified number of sequences.

.. class:: warningmark

If the **remove samples** option is enabled, samples whose total number of sequences is lower than the specified number will be removed inside the abundance table.

If the option is disabled, the samples will be kept in the analysis but with a number of sequences lower than the specified number (the total number of the sample).

.. class:: h3

Outputs

**Sequence file** (normalised_seed.fasta):

 The normalised sequence file (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).

**Abundance file** (normalised_abundance.biom):

 The normalised abundance file. If option **remove samples** has been chosen, then the abundance table will no longer contain information about the removed samples. (format `BIOM &lt;http://biom-format.org/&gt;`_).

**Report file** (report.html):

 Information about discarded data (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_).


.. class:: infomark page-header h2


@HELP_CONTACT@

	</help>
	<expand macro="citations" />
</tool>
author	oinizan
date	Fri, 02 May 2025 07:44:22 +0000
parents
children