changeset 16:d73e09b3bc26 draft

planemo upload for repository https://github.com/geraldinepascal/FROGS-wrappers/ commit 58dbb2c93ec7f854929ed4f5a294d6eeaaed467a-dirty
author oinizan
date Thu, 09 Mar 2023 07:59:57 +0000
parents 92265600b668
children 5ad1c5f24917
files affiliation_OTU.xml affiliation_stats.xml affiliations_stat.xml cluster_filters.xml cluster_stats.xml clustering.xml clusters_stat.xml demultiplex.xml macros.xml macros.xml_new otu_filters.xml preprocess.xml remove_chimera.xml static/images/FROGS_affiliation_stat_alignment.png static/images/FROGS_affiliation_stat_bootstrap.png static/images/FROGS_affiliation_stat_rarefaction.png static/images/FROGS_affiliation_stat_sunburst.png static/images/FROGS_affiliation_stat_taxonomies.png static/images/FROGS_affiliation_stats_alignment.png static/images/FROGS_affiliation_stats_bootstrap.png static/images/FROGS_affiliation_stats_rarefaction.png static/images/FROGS_affiliation_stats_sunburst.png static/images/FROGS_affiliation_stats_taxonomies.png static/images/FROGS_cluster_stat_clusterDistrib1.png static/images/FROGS_cluster_stat_sample_dist1.png static/images/FROGS_cluster_stat_sample_dist2.png static/images/FROGS_cluster_stat_seq_dist.png static/images/FROGS_cluster_stats_clusterDistrib1.png static/images/FROGS_cluster_stats_sample_dist1.png static/images/FROGS_cluster_stats_sample_dist2.png static/images/FROGS_cluster_stats_seq_dist.png taxonomic_affiliation.xml
diffstat 32 files changed, 824 insertions(+), 708 deletions(-) [+]
line wrap: on
line diff
--- a/affiliation_OTU.xml	Tue Mar 07 07:28:20 2023 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,166 +0,0 @@
-<tool id="FROGS_affiliation_OTU" name="FROGS Affiliation OTU" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
-    <description>Taxonomic affiliation of each OTU's seed by RDPtools and BLAST</description>
-    <macros>
-        <import>macros.xml</import>
-    </macros>
-    <expand macro="requirements">
-        <requirement type="package" version="2.10">blast</requirement>
-        <requirement type="package" version="6.6.0">emboss</requirement>
-    </expand>
-    <command detect_errors="exit_code"><![CDATA[
-    #set $reference_filename = str( $ref_file.fields.path )
-    export GALAXY_MEMORY_GB=\$((\${GALAXY_MEMORY_MB:-2048}/1024)) &&
-    affiliation_OTU.py
-        --reference '${reference_filename}'
-        --taxonomy-ranks $taxonomic_ranks
-        --input-biom '$biom_abundance'
-        --input-fasta '$fasta_sequences'
-        --output-biom '$biom_affiliation'
-        --summary '$summary'
-        @CPUS@
-        --java-mem \$GALAXY_MEMORY_GB
-        #if $rdp
-          --rdp
-        #end if
-    ]]></command>
-    <inputs>
-        <!-- Database Choice -->
-        <param name="ref_file" type="select" label="Using reference database" help="Select reference from the list">
-            <options from_data_table="frogs_db"/>
-            <validator type="no_options" message="A built-in database is not available"/>
-        </param>
-        <param argument="--rdp" type="boolean" label="Also perform RDP assignation?" help="Taxonomy affiliation will be perform thanks to Blast. This option allows to perform it also with RDP classifier tool (default No)"/>
-        <expand macro="taxonomic_ranks"/>
-        <!-- Files -->
-        <param format="fasta" name="fasta_sequences" type="data" label="Sequence file" help="The sequences to affiliated (format: FASTA)"/>
-        <param format="biom1" name="biom_abundance" type="data" label="Abundance file" help="The abundance file (format: BIOM)"/>
-    </inputs>
-    <outputs>
-        <data format="biom1" name="biom_affiliation" label="${tool.name}: affiliation_abundance.biom" from_work_dir="affiliation.biom"/>
-        <data format="html" name="summary" label="${tool.name}: report.html" from_work_dir="report.html"/>
-    </outputs>
-    <tests>
-        <test>
-            <param name="ref_file" value="ITS1_test"/>
-            <param name="fasta_sequences" value="references/04-filters.fasta"/>
-            <param name="biom_abundance" value="references/04-filters.biom"/>
-            <param name="rdp" value="true"/>
-            <output name="biom_affiliation" file="references/06-affiliation.biom" compare="sim_size" delta="5"/>
-            <output name="summary" file="references/06-affiliation.html" compare="diff" lines_diff="0"/>
-        </test>
-    </tests>
-    <help>
-
-@HELP_LOGO@
-
-.. class:: infomark page-header h2
-
-What it does
-
-this tool adds taxonomic affiliation in abundance file.
-
-
-.. class:: infomark page-header h2
-
-Inputs/outputs
-
-.. class:: h3
-
-Inputs
-
-**Sequence file**:
-
-The sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
-
-**Abundance file**:
-
-The abundance of each OTU in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).
-
-.. class:: h3
-
-Outputs
-
-**Abundance file** (tax_affiliation.biom):
-
- The abundance file with affiliation (format `BIOM &lt;http://biom-format.org/&gt;`_).
-
-**Report file** (report.html):
-
- This file presents the number of sequences affiliated by blast, and the number of multi-affiliation (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_).
-
- .. image:: FROGS_affiliation_summary.png
-   :height: 975
-   :width: 867
-
-.. class:: infomark page-header h2
-
-Reference database
-
-All the databases we format (on demand) for RDPClassifier and NCBI Blast+ are inventoried here: http://genoweb.toulouse.inrae.fr/frogs_databanks/assignation/readme.txt
-
-.. class:: infomark page-header h2
-
-How it works ?
-
-.. csv-table::
-   :header: "Steps", "Description"
-   :widths: 5, 150
-   :class: table table-striped
-
-   "1", "`RDPClassifier &lt;http://rdp.cme.msu.edu/classifier/classifier.jsp&gt;`_ may be used with database to associate to each OTU a taxonomy and a bootstrap (example: *Bacteria;(1.0);Firmicutes;(1.0);Clostridia;(1.0);Clostridiales;(1.0);Clostridiaceae 1;(1.0);Clostridium sensu stricto;(1.0);*)."
-   "2", "`blastn+ &lt;https://blast.ncbi.nlm.nih.gov/Blast.cgi&gt;`_ or `needlall &lt;http://emboss.sourceforge.net/apps/release/6.6/emboss/apps/needleall.html&gt;`_ is used to find alignment between each OTU and the database. Only the bests hits with the same score are reported. blastn+ is used for merged read pair, and needall is used for artificially combined sequence. For each alignment returned, several metrics are computed: identity percentage, coverage percentage, and alignment length"
-   "3", "For each OTU with several blastn+/needlall alignment results a consensus is determined on each taxonomic level. If all the taxa at a taxonomic rank are identical the taxon name is reported otherwise *Multi-affiliation* is reported. For example, if you have an OTU with two equivalent hits, associated to *Bacteria;Proteobacteria;Gamma Proteobacteria;Enterobacteriales*, and *Bacteria;Proteobacteria;Beta Proteobacteria;Methylophilales*, the consensus will be *Bacteria;Proteobacteria;Multi-affiliation;Multi-affiliation*."
-
-.. class:: infomark page-header h2
-
-Alignment metrics details on identity percentage calculation
-
-**- Problem with classical %id computation method**
-
-* **Case 1: a sequencing of overlapping sequences i.e. 16S V3-V4 amplicon MiSeq sequencing**
-
-.. image:: FROGS_affiliation_overlapped_percent_id.png
-    :height: 325
-    :width: 807
-
-* **Case 2 : a sequencing of non-overlapping sequences: case of ITS1 amplicon MiSeq sequencing**
-
-.. image:: FROGS_affiliation_combined_percent_id.png
-    :height: 310
-    :width: 887
-
-**- Finally, how percentage identity is computed ?**
-
-With the classical method of %id calculation, filtering on %id will systematically removed “FROGS combined” OTUs. So, we proposed to replace the classical %id by a %id computed on the sequenced bases only.
-
-.. image:: FROGS_affiliation_percent_id_formula.png
-    :height: 36
-    :width: 637
-
-For the precedent use cases we will obtain:
-
-* Case 1: 16S V3V4 overlapped sequence
-
-  % sequenced bases identity = 400 matches / 400 bp = 100%
-
-* Case 2: very large ITS1 “FROGS combined” shorter than the real sequence
-
-  % sequenced bases identity = (250 + 250 ) / (600 - 100) = 100%
-
-This calculation allows to return 100% of identity on sequenced bases for “FROGS combined” shorter or longer than reality in case of perfect sequencing, and a smaller percentage of identity in the case of small overlap repeat kept in FROGS combined sequence.
-
-.. class:: infomark page-header h2
-
-
-Advices
-
-This tool can take large time. It is recommended to filter your OTU abundance and sequence files before this tool (see **FROGS OTU Filters**).
-
-As you can see the affiliation of each OTU is not human readable in outputed abundance file. We provide a tools to convert these BIOM file in tabulated file, see the **FROGS BIOM to TSV** tool.
-
-
-@HELP_CONTACT@
-
-    </help>
-    <expand macro="citations"/>
-</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/affiliation_stats.xml	Thu Mar 09 07:59:57 2023 +0000
@@ -0,0 +1,153 @@
+<tool id="FROGS_affiliation_stats" name="FROGS Affiliation stats" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
+	<description>Process some metrics on taxonomies</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code">
+		affiliation_stats.py
+		    --input-biom '$biom'
+		    --output-file '$summary_file'
+		    --taxonomic-ranks $taxonomic_ranks
+		    --rarefaction-ranks $rarefaction_ranks
+		    #if $affiliation.affiliation_type == "FROGS_blast"
+		    	--multiple-tag 'blast_affiliations'
+		        --tax-consensus-tag 'blast_taxonomy'
+		        --identity-tag 'perc_identity'
+		        --coverage-tag 'perc_query_coverage'
+			#else if $affiliation.affiliation_type == "FROGS_rdp"
+				--taxonomy-tag 'rdp_taxonomy'
+				--bootstrap-tag 'rdp_bootstrap'
+			#else
+				--taxonomy-tag '$affiliation.taxonomy_tag'
+				#if $affiliation.bootstrap_tag
+					--bootstrap-tag '$affiliation.bootstrap_tag'
+				#end if
+				#if $affiliation.identity_tag and $affiliation.coverage_tag
+					--identity-tag '$affiliation.identity_tag'
+					--coverage-tag '$affiliation.coverage_tag'
+				#end if
+			#end if
+	</command>
+	<inputs>
+		<!-- Files -->
+		<param format="biom1" name="biom" type="data" label="Abundance file" help="Abundances and affiliations (format: BIOM)"/>
+		<!-- Parameters -->
+		<expand macro="taxonomic_ranks"/>
+		<param argument="--rarefaction-ranks" type="text"  optional="false" value="Class Order Family Genus Species" 
+			label="Rarefaction ranks" help="The ranks that will be evaluated in rarefaction. Each rank is separated by one space.">
+			<sanitizer invalid_char="">
+                <valid initial="string.letters">
+                    <add value=" " />
+                </valid>
+            </sanitizer>
+            <validator type="regex">[A-Za-z ]+</validator>
+		</param>
+		<conditional name="affiliation">
+			<param name="affiliation_type" type="select" label="Affiliation processed" help="Select the type of affiliation processed. If your affiliation has been processed with an external tool: use 'Custom'.">
+				<option value="FROGS_blast" selected="true">FROGS Blast</option>
+				<option value="FROGS_rdp">FROGS RDP</option>
+				<option value="custom">Custom</option>
+			</param>
+			<when value="FROGS_blast"/>
+			<when value="FROGS_rdp"/>
+			<when value="custom">
+				<param argument="--taxonomy-tag" type="text" value="taxonomy" label="Taxonomy tag" help="The metadata title in BIOM for the taxonomy">
+					<expand macro="sanitizer_validator"/>
+				</param>
+				<param argument="--bootstrap-tag" type="text" label="Bootstrap tag" help="The metadata title in BIOM for the taxonomy bootstrap">
+					<expand macro="sanitizer_validator"/>
+				</param>
+				<param argument="--identity-tag" type="text" label="Identity tag" help="The metadata tag used in BIOM file to store the alignment identity">
+					<expand macro="sanitizer_validator"/>
+				</param>
+				<param argument="--coverage-tag" type="text" label="Coverage tag" help="The metadata tag used in BIOM file to store the alignment OTUs coverage">
+					<expand macro="sanitizer_validator"/>
+				</param>
+			</when>
+		</conditional>
+	</inputs>
+	<outputs>
+		<data format="html" name="summary_file" label="${tool.name}: report.html" from_work_dir="summary.html"/>
+	</outputs>
+	<tests>
+		<test>
+			<param name="biom" value="references/09-normalisation.biom" />
+			<param name="rarefaction_ranks" value="Family Genus Species" />
+			<conditional name="affiliation">
+				<param name="affiliation_type" value="FROGS_blast" />
+				<param name="taxonomic_ranks" value="Domain Phylum Class Order Family Genus Species" />
+				<param name="taxonomy_tag" value="blast_taxonomy" />
+				<param name="identity_tag" value="perc_identity" />
+				<param name="coverage_tag" value="perc_query_coverage" />
+			</conditional>
+			<!-- differences may exist due to random function to generat rarefaction curves -->
+			<output name="summary_file" file="references/11-affiliationsStat.html" compare="sim_size" delta="20" />
+			<output name="summary_file" file="references/11-affiliationsStat.html" compare="diff" lines_diff="2" />
+		</test>
+	</tests>
+	<help>
+
+@HELP_LOGO@
+
+.. class:: infomark page-header h2
+
+What it does
+
+FROGS Affiliation stats computes several metrics and generates a HTML file describing OTUs based on their taxonomies and eventually the quality of the affiliations.
+
+
+.. class:: infomark page-header h2
+
+Input/output
+
+.. class:: h3
+
+Input
+
+**Abundance file**:
+
+The abundance and affiliation of each OTUs (format `BIOM &lt;http://biom-format.org/&gt;`_). This file can be produced by FROGS Affiliation OTU.
+
+The FROGS's tools working on clusters and others metagenomic workflows produce files in BIOM format.
+
+.. class:: h3
+
+Output
+
+**Report file** (report.html):
+
+ OTUs taxonomies and affiliations metrics (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_):
+
+  *-Taxonomy distribution*: displays the distribution of each taxon and the rarefaction for each taxonomic rank and for each sample
+
+  .. image:: FROGS_affiliation_stats_taxonomies.png
+    :height: 380
+    :width: 891
+
+  .. image:: FROGS_affiliation_stats_rarefaction.png
+    :height: 380
+    :width: 794
+
+  .. image:: FROGS_affiliation_stats_sunburst.png
+    :height: 380
+    :width: 440
+
+  -Bootstrap distribution: displays for affiliation methods with bootstrap the bootstrap on each taxonomic rank
+
+  .. image:: FROGS_affiliation_stats_bootstrap.png
+    :height: 380
+    :width: 867
+
+  -Alignment distribution: displays for affiliation methods with alignment the distribution of identity/coverage
+
+  .. image:: FROGS_affiliation_stats_alignment.png
+    :height: 380
+    :width: 859
+
+
+@HELP_CONTACT@
+
+	</help>
+	<expand macro="citations" />
+</tool>
--- a/affiliations_stat.xml	Tue Mar 07 07:28:20 2023 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,153 +0,0 @@
-<tool id="FROGS_affiliations_stat" name="FROGS Affiliations stat" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
-	<description>Process some metrics on taxonomies</description>
-    <macros>
-        <import>macros.xml</import>
-    </macros>
-    <expand macro="requirements"/>
-    <command detect_errors="exit_code">
-		affiliations_stat.py
-		    --input-biom '$biom'
-		    --output-file '$summary_file'
-		    --taxonomic-ranks $taxonomic_ranks
-		    --rarefaction-ranks $rarefaction_ranks
-		    #if $affiliation.affiliation_type == "FROGS_blast"
-		    	--multiple-tag 'blast_affiliations'
-		        --tax-consensus-tag 'blast_taxonomy'
-		        --identity-tag 'perc_identity'
-		        --coverage-tag 'perc_query_coverage'
-			#else if $affiliation.affiliation_type == "FROGS_rdp"
-				--taxonomy-tag 'rdp_taxonomy'
-				--bootstrap-tag 'rdp_bootstrap'
-			#else
-				--taxonomy-tag '$affiliation.taxonomy_tag'
-				#if $affiliation.bootstrap_tag
-					--bootstrap-tag '$affiliation.bootstrap_tag'
-				#end if
-				#if $affiliation.identity_tag and $affiliation.coverage_tag
-					--identity-tag '$affiliation.identity_tag'
-					--coverage-tag '$affiliation.coverage_tag'
-				#end if
-			#end if
-	</command>
-	<inputs>
-		<!-- Files -->
-		<param format="biom1" name="biom" type="data" label="Abundance file" help="Abundances and affiliations (format: BIOM)"/>
-		<!-- Parameters -->
-		<expand macro="taxonomic_ranks"/>
-		<param argument="--rarefaction-ranks" type="text"  optional="false" value="Class Order Family Genus Species" 
-			label="Rarefaction ranks" help="The ranks that will be evaluated in rarefaction. Each rank is separated by one space.">
-			<sanitizer invalid_char="">
-                <valid initial="string.letters">
-                    <add value=" " />
-                </valid>
-            </sanitizer>
-            <validator type="regex">[A-Za-z ]+</validator>
-		</param>
-		<conditional name="affiliation">
-			<param name="affiliation_type" type="select" label="Affiliation processed" help="Select the type of affiliation processed. If your affiliation has been processed with an external tool: use 'Custom'.">
-				<option value="FROGS_blast" selected="true">FROGS Blast</option>
-				<option value="FROGS_rdp">FROGS RDP</option>
-				<option value="custom">Custom</option>
-			</param>
-			<when value="FROGS_blast"/>
-			<when value="FROGS_rdp"/>
-			<when value="custom">
-				<param argument="--taxonomy-tag" type="text" value="taxonomy" label="Taxonomy tag" help="The metadata title in BIOM for the taxonomy">
-					<expand macro="sanitizer_validator"/>
-				</param>
-				<param argument="--bootstrap-tag" type="text" label="Bootstrap tag" help="The metadata title in BIOM for the taxonomy bootstrap">
-					<expand macro="sanitizer_validator"/>
-				</param>
-				<param argument="--identity-tag" type="text" label="Identity tag" help="The metadata tag used in BIOM file to store the alignment identity">
-					<expand macro="sanitizer_validator"/>
-				</param>
-				<param argument="--coverage-tag" type="text" label="Coverage tag" help="The metadata tag used in BIOM file to store the alignment OTUs coverage">
-					<expand macro="sanitizer_validator"/>
-				</param>
-			</when>
-		</conditional>
-	</inputs>
-	<outputs>
-		<data format="html" name="summary_file" label="${tool.name}: report.html" from_work_dir="summary.html"/>
-	</outputs>
-	<tests>
-		<test>
-			<param name="biom" value="references/09-normalisation.biom" />
-			<param name="rarefaction_ranks" value="Family Genus Species" />
-			<conditional name="affiliation">
-				<param name="affiliation_type" value="FROGS_blast" />
-				<param name="taxonomic_ranks" value="Domain Phylum Class Order Family Genus Species" />
-				<param name="taxonomy_tag" value="blast_taxonomy" />
-				<param name="identity_tag" value="perc_identity" />
-				<param name="coverage_tag" value="perc_query_coverage" />
-			</conditional>
-			<!-- differences may exist due to random function to generat rarefaction curves -->
-			<output name="summary_file" file="references/11-affiliationsStat.html" compare="sim_size" delta="20" />
-			<output name="summary_file" file="references/11-affiliationsStat.html" compare="diff" lines_diff="2" />
-		</test>
-	</tests>
-	<help>
-
-@HELP_LOGO@
-
-.. class:: infomark page-header h2
-
-What it does
-
-FROGS Affiliations stat computes several metrics and generates a HTML file describing OTUs based on their taxonomies and eventually the quality of the affiliations.
-
-
-.. class:: infomark page-header h2
-
-Input/output
-
-.. class:: h3
-
-Input
-
-**Abundance file**:
-
-The abundance and affiliation of each OTUs (format `BIOM &lt;http://biom-format.org/&gt;`_). This file can be produced by FROGS Affiliation OTU.
-
-The FROGS's tools working on clusters and others metagenomic workflows produce files in BIOM format.
-
-.. class:: h3
-
-Output
-
-**Report file** (report.html):
-
- OTUs taxonomies and affiliations metrics (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_):
-
-  *-Taxonomy distribution*: displays the distribution of each taxon and the rarefaction for each taxonomic rank and for each sample
-
-  .. image:: FROGS_affiliation_stat_taxonomies.png
-    :height: 380
-    :width: 891
-
-  .. image:: FROGS_affiliation_stat_rarefaction.png
-    :height: 380
-    :width: 794
-
-  .. image:: FROGS_affiliation_stat_sunburst.png
-    :height: 380
-    :width: 440
-
-  -Bootstrap distribution: displays for affiliation methods with bootstrap the bootstrap on each taxonomic rank
-
-  .. image:: FROGS_affiliation_stat_bootstrap.png
-    :height: 380
-    :width: 867
-
-  -Alignment distribution: displays for affiliation methods with alignment the distribution of identity/coverage
-
-  .. image:: FROGS_affiliation_stat_alignment.png
-    :height: 380
-    :width: 859
-
-
-@HELP_CONTACT@
-
-	</help>
-	<expand macro="citations" />
-</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cluster_filters.xml	Thu Mar 09 07:59:57 2023 +0000
@@ -0,0 +1,297 @@
+<?xml version="1.0"?>
+<!--
+# Copyright (C) 2015 INRA
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+-->
+<tool id="FROGS_cluster_filters" name="FROGS_4 Cluster filters" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+	<description>Filters clusters on several criteria.</description>
+
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <expand macro="requirements">
+        <requirement type="package" version="2.10">blast</requirement>
+    </expand>
+
+        <stdio>
+                <exit_code range="1:" />
+                <exit_code range=":-1" />
+        </stdio>
+
+	<command>
+
+			cluster_filters.py
+			--nb-cpus \${GALAXY_SLOTS:-1}
+			--input-biom '$input_biom'
+			--input-fasta '$input_fasta'
+			--output-fasta '$output_fasta'
+			--output-biom '$output_biom'
+			--excluded '$output_excluded'
+			--summary '$output_summary'
+
+			#if $choice_prevalence_method.prevalence_method == "all"
+				#if $choice_prevalence_method.min_sample_presence
+					--min-sample-presence $choice_prevalence_method.min_sample_presence
+				#end if
+			#else if $choice_prevalence_method.prevalence_method == "replicate"
+				#if $choice_prevalence_method.min_replicate_presence
+					--min-replicate-presence $choice_prevalence_method.min_replicate_presence
+					--replicate_file $choice_prevalence_method.replicate_file
+				#end if
+			#end if
+
+			#if $choice_abundance_unit.abundance_unit_type == "count"
+				#if $choice_abundance_unit.min_abundance_count
+					--min-abundance $choice_abundance_unit.min_abundance_count
+				#end if
+			#end if
+
+			#if $choice_abundance_unit.abundance_unit_type == "proportion"
+				#if $choice_abundance_unit.min_abundance_proportion
+					--min-abundance $choice_abundance_unit.min_abundance_proportion
+				#end if
+			#end if
+
+			#if $nb_biggest_clusters
+				--nb-biggest-clusters $nb_biggest_otu
+			#end if
+
+
+			#if $contaminantSource.which_contaminantSource == "history"
+				##build index on the fly
+				--contaminant '${contaminantSource.ownContaminantFile}'
+			#else if $contaminantSource.which_contaminantSource == "server"
+				##use precomputed indexes
+				--contaminant '${contaminantSource.contaminants_db.fields.path}'
+			#end if
+
+	</command>
+	<inputs>
+		<!-- Files -->
+		<param format="fasta" name="input_fasta" type="data" label="Sequence file" help="The sequence file to filter (format: FASTA)" />
+		<param format="biom1" name="input_biom" type="data" label="Abundance file" help="The abundance file to filter (format: BIOM)" />
+
+		<conditional name="choice_prevalence_method">
+			<param name="prevalence_method" type="select" label="Minimum prevalence method">
+				<option value="all"> all samples </option>
+				<option value="replicate"> replicate identification </option>
+			</param>
+			<when value="all">
+				<param name="min_sample_presence" type="integer" optional="true" label="Minimum prevalence" size="5" help="Fill the field only if you want this treatment. Keep cluster if it is present in at least this number of samples.">
+					<validator type="in_range" min="2" message="To be effective this threshold need to be higher than 1" />
+				</param>
+			</when>
+			<when value="replicate">
+				<param name="replicate_file" type="data" format="tsv" optional="True" label="File of replicated sample names" help="Replicate file to link each sample to its group (cf. Help section)." />
+				<param name="min_replicate_presence" type="float" min="0" max="1" optional="true" label="Minimum prevalence" size="5" help="Fill the field only if you want this treatment. Keep cluster present in at least this proportion of replicates in at least one group (must be a proportion between 0 and 1).">
+				</param>
+			</when>
+		</conditional>
+
+		<conditional name="choice_abundance_unit">
+			<param name="abundance_unit_type" type="select" label="Minimum cluster abundancy as proportion or count. We recommend to use a proportion of 0.00005.">
+				<option value="proportion">as proportion</option>
+				<option value="count">as count</option>
+			</param>
+			<when value="proportion">
+				<param name="min_abundance_proportion" argument="--min_abundance" type="float" optional="true" label="Minimum proportion of sequences abundancy to keep cluster" help='Fill the field only if you want this treatment. Example: 0.00005, recommended by Bokulich et al 2013, to keep cluster with at least 0.005% of all sequences'>
+					<validator type="in_range" min="0" exclude_min="true" max="1" exclude_max="true" message="Abundance proportion threshold need to be strictly greater than 0 and less than 1, otherwise you will not remove anything or remove everything." />
+				</param>
+			</when>
+			<when value="count">
+				<param name="min_abundance_count" argument="--min_abundance" type="integer" optional="true" label="Minimum number of sequences to keep cluster" help='Fill the field only if you want this treatment. Ex: 2 to keep cluster with at least 2 sequences, so remove single singleton'>
+					<validator type="in_range" min="2" message='To be effective this threshold need to be higher than 1. 2 means that you will remove cluster with 1 and only 1 sequence over all samples'/>
+				</param>
+			</when>
+		</conditional>
+
+		<param argument="--nb-biggest-clusters" type="integer" optional="true" label="N biggest clusters" help="Fill the fields only if you want this treatment. Keep the N biggest clusters" />
+
+		<conditional name="contaminantSource">
+			<param name="which_contaminantSource" argument="--contaminant" type="select" label="Search for contaminant clusters." help="Either you use your own contaminant fasta file or you select one among available ones.">
+				<option value="no">No contaminant filter</option>
+				<option value="server">Use contaminant FASTA file from the server</option>
+				<option value="history">Use contaminant FASTA file from the history</option>
+			</param>
+			<when value="no"/>
+			<when value="server">
+				<param name="contaminants_db" type="select" label="Contaminant databank" help="For example the phiX databank (the phiX is a control added in Illumina sequencing technologies).">
+					<options from_data_table="frogs_contaminant_db"></options>
+					<validator type="no_options" message="A built-in database is not available" />
+				</param>
+			</when>
+			<when value="history">
+				<param name="ownContaminantFile" type="data" format="fasta" label="Select a contaminante reference from history" />
+			</when>
+		</conditional>
+	</inputs>
+	<outputs>
+		<data format="biom1" name="output_biom" label="${tool.name}: clusterFilters_abundance.biom" from_work_dir="clusterFilters_abundance.biom" />
+		<data format="fasta" name="output_fasta" label="${tool.name}: clusterFilters_sequences.fasta" from_work_dir="clusterFilters_sequences.fasta" />
+		<data format="tsv" name="output_excluded" label="${tool.name}: excluded.tsv" from_work_dir="excluded.tsv" />
+		<data format="html" name="output_summary" label="${tool.name}: report.html" from_work_dir="report.html" />
+	</outputs>
+	<tests>
+		<test>
+	        <param name="input_fasta" value="references/03-chimera.fasta" />
+            <param name="input_biom" value="references/03-chimera.biom" />
+            <param name="min_sample_presence" value="3" />
+		    <conditional name="choice_abundance_unit">
+		    	<param name="abundance_unit_type" value="proportion" />
+				<param name="min_abundance_proportion" value="0.00005" />
+		    </conditional>
+            <conditional name="contaminantSource">
+            	<param name="which_contaminantSource" value="server"/>
+				<param name="contaminant_db" value="phiX_test" />
+			</conditional>
+			<conditional name="choice_prevalence_method">
+				<param name="prevalence_method" value="replicate" />
+				<param name="replicate_file" value="input/replicate_file.tsv" />
+				<param name="min_replicate_presence" value="0.5"/>
+			</conditional>
+	 	    <output name="output_fasta" file="references/04-filters.fasta" compare="diff" lines_diff="0" />
+	 	    <output name="output_biom" file="references/04-filters.biom" compare="sim_size" delta="0" />
+		    <output name="output_excluded" file="references/04-filters.excluded" compare="diff" lines_diff="0" />
+		    <output name="output_summary" file="references/04-filters.html" compare="diff" lines_diff="0" />
+	    </test>
+	</tests>
+	<help>
+
+@HELP_LOGO@
+
+.. class:: infomark page-header h2
+
+What it does
+
+Filter the clusters in an abundance table according to:
+
+-The abundance and the occurence of clusters: presence in samples, cluster size and maximum number of clusters.
+
+-Contamination: from the list of proposition (ex : phiX, a control added in Illumina sequencing technologies) or from your history (ex : a fasta file containing a list of contaminant of your choice).
+
+
+
+.. class:: infomark page-header h2
+
+Inputs/outputs
+
+
+.. class:: h3
+
+Inputs
+
+**Sequence file**:
+
+The sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
+
+**Abundance file**:
+
+The abundance of each OTU in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).
+
+**Contaminant fasta file** (optional):
+
+A sequence fasta file containing the reference sequence of known contaminant
+
+**File of replicated sample names** (optional):
+
+If you selected " replicate identification " for the minimum prevalence method, this file is needed to identify the group to which the samples belong.
+
+If not all samples are present in the "File of replicated sample names", they will not be affected by this filter step and will be kept in the abundance table.
+
+The file must consist of only 2 columns, separated by a tabulation. The first column contains the exact names of the samples (exactly those contained in the biom file) and the second column contains the name of the group to which they belong.
+
+Please note that group names must not contain accents, spaces or special characters.
+
+.. class:: h3
+
+Outputs
+
+**Sequence file** (sequences.fasta):
+
+ The sequences after filtering (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
+
+**Abundance file** (abundance.biom):
+
+ The abundance after filtering (format `BIOM &lt;http://biom-format.org/&gt;`_).
+
+**Excluded file** (excluded.txt):
+
+ The list of the OTUs deleted by filters (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).
+
+**Report file** (report.html):
+
+ The filters and the number of removed sequences (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_).
+
+
+
+.. class:: infomark page-header h2
+
+How it works?
+
+
+
+
+The OTUs kept are the ones that satisfy into the BIOM input file the thresholds specified by the user.
+
+The BIOM abundance table and the fasta file are written again according to the OTUs kept.
+
+The OTUs discarded are listed in the excluded file.
+
+.. csv-table::
+   :header: "Steps", "description"
+   :widths: 5, 150
+   :class: table table-striped
+
+   "1", "Except the filter to select the n most abundant OTUs, all the selected filters are run independently. For each filters a list of the OTUs to remove is generated. Concerning contaminant research, OTUs are added to the previous list if it aligns on a contaminant reference sequence with 80% of identity and 80% of coverage"
+   "2", "All the OTUs tagged to remove by at least one filter are removed."
+   "3", "If the filter to select the N most abundant OTUs is filled it is applied."
+
+
+
+.. class:: infomark page-header h2
+
+Advices
+
+Please check that the input fasta file and the input BIOM file correspond to the same OTUs.
+
+Examples for the filters on abundance and occurence of the OTUs :
+
+-To keep the filters that are present in 5 samples, fill the **Minimum prevalence method** field with **all samples** option, with "5".
+
+-To keep the filters that are present in half of the replicates, fill the **Minimum prevalence method** field with **replicate identification** option, with a minimum prevalence of "0.5".
+
+.. image:: FROGS_otu_filter_replicates_file.png
+
+In this example, if we want to keep the OTUs/ASVs that are present in at least 50% of the samples, we set the threshold at 0.5. The process will therefore keep the OTUs/ASVs present in at least 
+
+ - 2 "rich" samples
+ - 3 "richAB" samples,
+ - 1 "lowAB" sample
+ - 1 "april21" sample
+
+and all OTUs/ASVs in sample9 since it is the only representative of the "low" condition.
+
+-To display the 20 biggest OTUs, fill the corresponding field with "20".
+
+-To filter on abundance, we advise you to specify 0.005% i.e. 0.00005. It seems to be the optimal threshold (`Bokulich *et al*, 2013 &lt;http://www.nature.com/nmeth/journal/v10/n1/abs/nmeth.2276.html&gt;`_ ).
+
+
+@HELP_CONTACT@
+
+	</help>
+	<expand macro="citations" />
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cluster_stats.xml	Thu Mar 09 07:59:57 2023 +0000
@@ -0,0 +1,87 @@
+<tool id="FROGS_cluster_stats" name="FROGS Cluster stats" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
+	<description>Process some metrics on clusters</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code">
+	cluster_stats.py
+		--input-biom '$biom'
+		--output-file '$summary_file'
+	</command>
+	<inputs>
+		<param format="biom1" name="biom" type="data" label="Abundance file" help="Clusters abundance (format: BIOM)"/>
+	</inputs>
+	<outputs>
+		<data format="html" name="summary_file" label="${tool.name}: report.html" from_work_dir="summary.html"/>
+	</outputs>
+	<tests>
+		<test>
+			<param name="biom" value="references/09-normalisation.biom"/>
+			<output name="summary_file" value="references/10-clustersStat.html" compare="diff" lines_diff="0"/>
+		</test>
+	</tests>
+	<help>
+
+@HELP_LOGO@
+
+.. class:: infomark page-header h2
+
+What it does
+
+FROGS Cluster stats computes several metrics and generates a HTML file describing clusters based on abundances, samples, ...
+
+
+.. class:: infomark page-header h2
+
+Input/output
+
+.. class:: h3
+
+Input
+
+**Abundance file**:
+
+The abundance of each cluster in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).
+
+The FROGS's tools working on clusters and others metagenomic workflows produce files in BIOM format.
+
+.. class:: h3
+
+Output
+
+**Report file** (report.html):
+
+ *Cluster distribution* : describes the sizes distribution of all clusters thanks to boxplots and tables
+
+ .. image:: FROGS_cluster_stats_clusterDistrib1.png
+    :height: 1180
+    :width: 900
+
+ *Sequence distribution* : describes the sequences distribution among clusters
+
+ .. image:: FROGS_cluster_stats_seq_dist.png
+
+ *Sample distribution* : describes clusters distribution among samples and gives an `hierarchical clustering &lt;http://en.wikipedia.org/wiki/Hierarchical_clustering&gt;`_ on samples abundance profile (distance method = `braycurtis &lt;http://fr.wikipedia.org/wiki/Distance_de_Bray-Curtis&gt;`_, linkage method = average)
+
+ .. image:: FROGS_cluster_stats_sample_dist1.png
+    :height: 400
+    :width: 700
+
+ .. image:: FROGS_cluster_stats_sample_dist2.png
+    :height: 350
+    :width: 610
+
+.. class:: infomark page-header h2
+
+Advices
+
+This is a very usefull tool to see the evolution of the OTUs. Do not hesitate to run this tool after each FROGS step beginning at the clustering step.
+
+
+
+@HELP_CONTACT@
+
+	</help>
+	<expand macro="citations" />
+</tool>
--- a/clustering.xml	Tue Mar 07 07:28:20 2023 +0000
+++ b/clustering.xml	Thu Mar 09 07:59:57 2023 +0000
@@ -1,4 +1,4 @@
-<tool id="FROGS_clustering" name="FROGS Clustering swarm" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
+<tool id="FROGS_clustering" name="FROGS_2 Clustering swarm" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
     <description>Single-linkage clustering on sequences</description>
     <macros>
         <import>macros.xml</import>
--- a/clusters_stat.xml	Tue Mar 07 07:28:20 2023 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-<tool id="FROGS_clusters_stat" name="FROGS Clusters stat" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
-	<description>Process some metrics on clusters</description>
-    <macros>
-        <import>macros.xml</import>
-    </macros>
-    <expand macro="requirements"/>
-    <command detect_errors="exit_code">
-	clusters_stat.py
-		--input-biom '$biom'
-		--output-file '$summary_file'
-	</command>
-	<inputs>
-		<param format="biom1" name="biom" type="data" label="Abundance file" help="Clusters abundance (format: BIOM)"/>
-	</inputs>
-	<outputs>
-		<data format="html" name="summary_file" label="${tool.name}: report.html" from_work_dir="summary.html"/>
-	</outputs>
-	<tests>
-		<test>
-			<param name="biom" value="references/09-normalisation.biom"/>
-			<output name="summary_file" value="references/10-clustersStat.html" compare="diff" lines_diff="0"/>
-		</test>
-	</tests>
-	<help>
-
-@HELP_LOGO@
-
-.. class:: infomark page-header h2
-
-What it does
-
-FROGS Clusters stat computes several metrics and generates a HTML file describing clusters based on abundances, samples, ...
-
-
-.. class:: infomark page-header h2
-
-Input/output
-
-.. class:: h3
-
-Input
-
-**Abundance file**:
-
-The abundance of each cluster in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).
-
-The FROGS's tools working on clusters and others metagenomic workflows produce files in BIOM format.
-
-.. class:: h3
-
-Output
-
-**Report file** (report.html):
-
- *Cluster distribution* : describes the sizes distribution of all clusters thanks to boxplots and tables
-
- .. image:: FROGS_cluster_stat_clusterDistrib1.png
-    :height: 1180
-    :width: 900
-
- *Sequence distribution* : describes the sequences distribution among clusters
-
- .. image:: FROGS_cluster_stat_seq_dist.png
-
- *Sample distribution* : describes clusters distribution among samples and gives an `hierarchical clustering &lt;http://en.wikipedia.org/wiki/Hierarchical_clustering&gt;`_ on samples abundance profile (distance method = `braycurtis &lt;http://fr.wikipedia.org/wiki/Distance_de_Bray-Curtis&gt;`_, linkage method = average)
-
- .. image:: FROGS_cluster_stat_sample_dist1.png
-    :height: 400
-    :width: 700
-
- .. image:: FROGS_cluster_stat_sample_dist2.png
-    :height: 350
-    :width: 610
-
-.. class:: infomark page-header h2
-
-Advices
-
-This is a very usefull tool to see the evolution of the OTUs. Do not hesitate to run this tool after each FROGS step beginning at the clustering step.
-
-
-
-@HELP_CONTACT@
-
-	</help>
-	<expand macro="citations" />
-</tool>
--- a/demultiplex.xml	Tue Mar 07 07:28:20 2023 +0000
+++ b/demultiplex.xml	Thu Mar 09 07:59:57 2023 +0000
@@ -1,4 +1,4 @@
-<tool id="FROGS_demultiplex" name="FROGS Demultiplex reads" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
+<tool id="FROGS_demultiplex" name="FROGS_0 Demultiplex reads" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
 	<description>Attribute reads to samples in function of inner barcode</description>
     <macros>
         <import>macros.xml</import>
--- a/macros.xml	Tue Mar 07 07:28:20 2023 +0000
+++ b/macros.xml	Thu Mar 09 07:59:57 2023 +0000
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@TOOL_VERSION@">4.1.0</token>
+    <token name="@TOOL_VERSION@">4.0.1</token>
     <token name="@VERSION_SUFFIX@">1</token>
     
     <xml name="requirements">
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml_new	Thu Mar 09 07:59:57 2023 +0000
@@ -0,0 +1,116 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">4.1.0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
+    
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">frogs</requirement>
+            <yield />
+        </requirements>
+    </xml>
+
+    <token name="@CPUS@">--nb-cpus \${GALAXY_SLOTS:-1}</token>
+    <xml name="requirements_phyloseq">
+        <expand macro="requirements">
+            <requirement type="package" version="4.1.2">r-base</requirement>
+            <!-- <requirement type="package" version="2.11.4">pandoc</requirement> -->
+            <requirement type="package" version="4.1">r-essentials</requirement>
+            <requirement type="package" version="1.38.0">bioconductor-phyloseq</requirement>
+            <yield />
+        </expand>
+    </xml>
+
+    <xml name="requirements_frogsfunc">
+        <expand macro="requirements">
+            <requirement type="package" version="2.4.1">picrust2</requirement>
+            <yield />
+        </expand>
+    </xml>
+    
+    <xml name="taxonomic_ranks">
+        <param argument="--taxonomic-ranks" type="text" optional="false" value="Domain Phylum Class Order Family Genus Species" 
+            label="Taxonomic ranks" help="The ordered taxonomic rank levels stored in BIOM. Each rank is separated by one space">
+            <sanitizer invalid_char="">
+                <valid initial="string.letters">
+                    <add value=" " />
+                </valid>
+            </sanitizer>
+            <validator type="regex">[A-Za-z ]+</validator>
+        </param>
+    </xml>
+
+    <xml name="sanitizer_validator">
+        <sanitizer invalid_char="">
+            <valid initial="string.letters,string.digits">
+                <add value="+" />
+                <add value="*" />
+                <add value="-" />
+                <add value="=" />
+                <add value=" " />
+                <add value="_" />
+                <add value="," />
+                <add value="." />
+            </valid>
+        </sanitizer>
+        <validator type="regex">[A-Za-z0-9+*-= _,.]+</validator>
+    </xml>
+    
+    <token name="@HELP_LOGO@">
+.. image:: FROGS_logo.png
+   :height: 144
+   :width: 110
+
+    </token>
+    <token name="@HELP_CONTACT@">
+
+----
+
+**Contact**
+
+Contacts: frogs-support@inrae.fr
+
+Repositories: https://github.com/geraldinepascal/FROGS, https://github.com/geraldinepascal/FROGS-wrappers
+
+Website: http://frogs.toulouse.inrae.fr/
+
+Depending on which kind of amplicon you are working on, please cite one of the two FROGS publications:
+
+    </token>
+    <xml name="citations">
+        <citations>
+        <citation type="bibtex">
+            @article{10.1093/bioinformatics/btx791,
+                author = {Escudié, Frédéric and Auer, Lucas and Bernard, Maria and Mariadassou, Mahendra and Cauquil, Laurent and Vidal, Katia and Maman, Sarah and Hernandez-Raquet, Guillermina and Combes, Sylvie and Pascal, Géraldine},
+                title = "{FROGS: Find, Rapidly, OTUs with Galaxy Solution}",
+                journal = {Bioinformatics},
+                volume = {34},
+                number = {8},
+                pages = {1287-1294},
+                year = {2018},
+                month = {04},
+                abstract = "{Metagenomics leads to major advances in microbial ecology and biologists need user friendly tools to analyze their data on their own.This Galaxy-supported pipeline, called FROGS, is designed to analyze large sets of amplicon sequences and produce abundance tables of Operational Taxonomic Units (OTUs) and their taxonomic affiliation. The clustering uses Swarm. The chimera removal uses VSEARCH, combined with original cross-sample validation. The taxonomic affiliation returns an innovative multi-affiliation output to highlight databases conflicts and uncertainties. Statistical results and numerous graphical illustrations are produced along the way to monitor the pipeline. FROGS was tested for the detection and quantification of OTUs on real and in silico datasets and proved to be rapid, robust and highly sensitive. It compares favorably with the widespread mothur, UPARSE and QIIME.Source code and instructions for installation: https://github.com/geraldinepascal/FROGS.git. A companion website: http://frogs.toulouse.inra.fr.Supplementary data are available at Bioinformatics online.}",
+                issn = {1367-4803},
+                doi = {10.1093/bioinformatics/btx791},
+                url = {https://doi.org/10.1093/bioinformatics/btx791},
+                eprint = {https://academic.oup.com/bioinformatics/article-pdf/34/8/1287/25120140/btx791\_supplementary\_file.pdf},
+            }
+        </citation>
+        <citation type="bibtex">
+            @article{10.1093/bib/bbab318,
+                author = {Bernard, Maria and Rué, Olivier and Mariadassou, Mahendra and Pascal, Géraldine},
+                title = "{FROGS: a powerful tool to analyse the diversity of fungi with special management of internal transcribed spacers}",
+                journal = {Briefings in Bioinformatics},
+                year = {2021},
+                month = {08},
+                abstract = "{Fungi are present in all environments. They fulfil important ecological functions and play a crucial role in the food industry. Their accurate characterization is thus indispensable, particularly through metabarcoding. The most frequently used markers to monitor fungi are ITSs. These markers are the best documented in public databases but have one main weakness: polymerase chain reaction amplification may produce non-overlapping reads in a significant fraction of the fungi. When these reads are filtered out, traditional metabarcoding pipelines lose part of the information and consequently produce biased pictures of the composition and structure of the environment under study. We developed a solution that enables processing of the entire set of reads including both overlapping and non-overlapping, thus providing a more accurate picture of fungal communities. Our comparative tests using simulated and real data demonstrated the effectiveness of our solution, which can be used by both experts and non-specialists on a command line or through the Galaxy-based web interface.}",
+                issn = {1477-4054},
+                doi = {10.1093/bib/bbab318},
+                url = {https://doi.org/10.1093/bib/bbab318},
+                note = {bbab318},
+                eprint = {https://academic.oup.com/bib/advance-article-pdf/doi/10.1093/bib/bbab318/39805849/bbab318.pdf},
+            }
+        </citation>
+        </citations>
+    </xml>
+</macros>
--- a/otu_filters.xml	Tue Mar 07 07:28:20 2023 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,297 +0,0 @@
-<?xml version="1.0"?>
-<!--
-# Copyright (C) 2015 INRA
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
--->
-<tool id="FROGS_OTU_filters" name="FROGS OTU Filters" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
-	<description>Filters OTUs on several criteria.</description>
-
-    <macros>
-        <import>macros.xml</import>
-    </macros>
-
-    <expand macro="requirements">
-        <requirement type="package" version="2.10">blast</requirement>
-    </expand>
-
-        <stdio>
-                <exit_code range="1:" />
-                <exit_code range=":-1" />
-        </stdio>
-
-	<command>
-
-			otu_filters.py
-			--nb-cpus \${GALAXY_SLOTS:-1}
-			--input-biom '$input_biom'
-			--input-fasta '$input_fasta'
-			--output-fasta '$output_fasta'
-			--output-biom '$output_biom'
-			--excluded '$output_excluded'
-			--summary '$output_summary'
-
-			#if $choice_prevalence_method.prevalence_method == "all"
-				#if $choice_prevalence_method.min_sample_presence
-					--min-sample-presence $choice_prevalence_method.min_sample_presence
-				#end if
-			#else if $choice_prevalence_method.prevalence_method == "replicate"
-				#if $choice_prevalence_method.min_replicate_presence
-					--min-replicate-presence $choice_prevalence_method.min_replicate_presence
-					--replicate_file $choice_prevalence_method.replicate_file
-				#end if
-			#end if
-
-			#if $choice_abundance_unit.abundance_unit_type == "count"
-				#if $choice_abundance_unit.min_abundance_count
-					--min-abundance $choice_abundance_unit.min_abundance_count
-				#end if
-			#end if
-
-			#if $choice_abundance_unit.abundance_unit_type == "proportion"
-				#if $choice_abundance_unit.min_abundance_proportion
-					--min-abundance $choice_abundance_unit.min_abundance_proportion
-				#end if
-			#end if
-
-			#if $nb_biggest_otu
-				--nb-biggest-otu $nb_biggest_otu
-			#end if
-
-
-			#if $contaminantSource.which_contaminantSource == "history"
-				##build index on the fly
-				--contaminant '${contaminantSource.ownContaminantFile}'
-			#else if $contaminantSource.which_contaminantSource == "server"
-				##use precomputed indexes
-				--contaminant '${contaminantSource.contaminants_db.fields.path}'
-			#end if
-
-	</command>
-	<inputs>
-		<!-- Files -->
-		<param format="fasta" name="input_fasta" type="data" label="Sequence file" help="The sequence file to filter (format: FASTA)" />
-		<param format="biom1" name="input_biom" type="data" label="Abundance file" help="The abundance file to filter (format: BIOM)" />
-
-		<conditional name="choice_prevalence_method">
-			<param name="prevalence_method" type="select" label="Minimum prevalence method">
-				<option value="all"> all samples </option>
-				<option value="replicate"> replicate identification </option>
-			</param>
-			<when value="all">
-				<param name="min_sample_presence" type="integer" optional="true" label="Minimum prevalence" size="5" help="Fill the field only if you want this treatment. Keep OTU if it is present in at least this number of samples.">
-					<validator type="in_range" min="2" message="To be effective this threshold need to be higher than 1" />
-				</param>
-			</when>
-			<when value="replicate">
-				<param name="replicate_file" type="data" format="tsv" optional="True" label="File of replicated sample names" help="Replicate file to link each sample to its group (cf. Help section)." />
-				<param name="min_replicate_presence" type="float" min="0" max="1" optional="true" label="Minimum prevalence" size="5" help="Fill the field only if you want this treatment. Keep OTU present in at least this proportion of replicates in at least one group (must be a proportion between 0 and 1).">
-				</param>
-			</when>
-		</conditional>
-
-		<conditional name="choice_abundance_unit">
-			<param name="abundance_unit_type" type="select" label="Minimum OTU abundancy as proportion or count. We recommend to use a proportion of 0.00005.">
-				<option value="proportion">as proportion</option>
-				<option value="count">as count</option>
-			</param>
-			<when value="proportion">
-				<param name="min_abundance_proportion" argument="--min_abundance" type="float" optional="true" label="Minimum proportion of sequences abundancy to keep OTU" help='Fill the field only if you want this treatment. Example: 0.00005, recommended by Bokulich et al 2013, to keep OTU with at least 0.005% of all sequences'>
-					<validator type="in_range" min="0" exclude_min="true" max="1" exclude_max="true" message="Abundance proportion threshold need to be strictly greater than 0 and less than 1, otherwise you will not remove anything or remove everything." />
-				</param>
-			</when>
-			<when value="count">
-				<param name="min_abundance_count" argument="--min_abundance" type="integer" optional="true" label="Minimum number of sequences to keep OTU" help='Fill the field only if you want this treatment. Ex: 2 to keep OTU with at least 2 sequences, so remove single singleton'>
-					<validator type="in_range" min="2" message='To be effective this threshold need to be higher than 1. 2 means that you will remove OTU with 1 and only 1 sequence over all samples'/>
-				</param>
-			</when>
-		</conditional>
-
-		<param argument="--nb-biggest-otu" type="integer" optional="true" label="N biggest OTUs" help="Fill the fields only if you want this treatment. Keep the N biggest OTU" />
-
-		<conditional name="contaminantSource">
-			<param name="which_contaminantSource" argument="--contaminant" type="select" label="Search for contaminant OTU." help="Either you use your own contaminant fasta file or you select one among available ones.">
-				<option value="no">No contaminant filter</option>
-				<option value="server">Use contaminant FASTA file from the server</option>
-				<option value="history">Use contaminant FASTA file from the history</option>
-			</param>
-			<when value="no"/>
-			<when value="server">
-				<param name="contaminants_db" type="select" label="Contaminant databank" help="For example the phiX databank (the phiX is a control added in Illumina sequencing technologies).">
-					<options from_data_table="frogs_contaminant_db"></options>
-					<validator type="no_options" message="A built-in database is not available" />
-				</param>
-			</when>
-			<when value="history">
-				<param name="ownContaminantFile" type="data" format="fasta" label="Select a contaminante reference from history" />
-			</when>
-		</conditional>
-	</inputs>
-	<outputs>
-		<data format="biom1" name="output_biom" label="${tool.name}: otuFilter_abundance.biom" from_work_dir="otuFilter_abundance.biom" />
-		<data format="fasta" name="output_fasta" label="${tool.name}: otuFilter_sequences.fasta" from_work_dir="otuFilter_sequences.fasta" />
-		<data format="tsv" name="output_excluded" label="${tool.name}: excluded.tsv" from_work_dir="excluded.tsv" />
-		<data format="html" name="output_summary" label="${tool.name}: report.html" from_work_dir="report.html" />
-	</outputs>
-	<tests>
-		<test>
-	        <param name="input_fasta" value="references/03-chimera.fasta" />
-            <param name="input_biom" value="references/03-chimera.biom" />
-            <param name="min_sample_presence" value="3" />
-		    <conditional name="choice_abundance_unit">
-		    	<param name="abundance_unit_type" value="proportion" />
-				<param name="min_abundance_proportion" value="0.00005" />
-		    </conditional>
-            <conditional name="contaminantSource">
-            	<param name="which_contaminantSource" value="server"/>
-				<param name="contaminant_db" value="phiX_test" />
-			</conditional>
-			<conditional name="choice_prevalence_method">
-				<param name="prevalence_method" value="replicate" />
-				<param name="replicate_file" value="input/replicate_file.tsv" />
-				<param name="min_replicate_presence" value="0.5"/>
-			</conditional>
-	 	    <output name="output_fasta" file="references/04-filters.fasta" compare="diff" lines_diff="0" />
-	 	    <output name="output_biom" file="references/04-filters.biom" compare="sim_size" delta="0" />
-		    <output name="output_excluded" file="references/04-filters.excluded" compare="diff" lines_diff="0" />
-		    <output name="output_summary" file="references/04-filters.html" compare="diff" lines_diff="0" />
-	    </test>
-	</tests>
-	<help>
-
-@HELP_LOGO@
-
-.. class:: infomark page-header h2
-
-What it does
-
-Filter the OTUs in an abundance table according to:
-
--The abundance and the occurence of OTUs: presence in samples, OTU size and maximum number of OTUs.
-
--Contamination: from the list of proposition (ex : phiX, a control added in Illumina sequencing technologies) or from your history (ex : a fasta file containing a list of contaminant of your choice).
-
-
-
-.. class:: infomark page-header h2
-
-Inputs/outputs
-
-
-.. class:: h3
-
-Inputs
-
-**Sequence file**:
-
-The sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
-
-**Abundance file**:
-
-The abundance of each OTU in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).
-
-**Contaminant fasta file** (optional):
-
-A sequence fasta file containing the reference sequence of known contaminant
-
-**File of replicated sample names** (optional):
-
-If you selected " replicate identification " for the minimum prevalence method, this file is needed to identify the group to which the samples belong.
-
-If not all samples are present in the "File of replicated sample names", they will not be affected by this filter step and will be kept in the abundance table.
-
-The file must consist of only 2 columns, separated by a tabulation. The first column contains the exact names of the samples (exactly those contained in the biom file) and the second column contains the name of the group to which they belong.
-
-Please note that group names must not contain accents, spaces or special characters.
-
-.. class:: h3
-
-Outputs
-
-**Sequence file** (sequences.fasta):
-
- The sequences after filtering (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
-
-**Abundance file** (abundance.biom):
-
- The abundance after filtering (format `BIOM &lt;http://biom-format.org/&gt;`_).
-
-**Excluded file** (excluded.txt):
-
- The list of the OTUs deleted by filters (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).
-
-**Report file** (report.html):
-
- The filters and the number of removed sequences (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_).
-
-
-
-.. class:: infomark page-header h2
-
-How it works?
-
-
-
-
-The OTUs kept are the ones that satisfy into the BIOM input file the thresholds specified by the user.
-
-The BIOM abundance table and the fasta file are written again according to the OTUs kept.
-
-The OTUs discarded are listed in the excluded file.
-
-.. csv-table::
-   :header: "Steps", "description"
-   :widths: 5, 150
-   :class: table table-striped
-
-   "1", "Except the filter to select the n most abundant OTUs, all the selected filters are run independently. For each filters a list of the OTUs to remove is generated. Concerning contaminant research, OTUs are added to the previous list if it aligns on a contaminant reference sequence with 80% of identity and 80% of coverage"
-   "2", "All the OTUs tagged to remove by at least one filter are removed."
-   "3", "If the filter to select the N most abundant OTUs is filled it is applied."
-
-
-
-.. class:: infomark page-header h2
-
-Advices
-
-Please check that the input fasta file and the input BIOM file correspond to the same OTUs.
-
-Examples for the filters on abundance and occurence of the OTUs :
-
--To keep the filters that are present in 5 samples, fill the **Minimum prevalence method** field with **all samples** option, with "5".
-
--To keep the filters that are present in half of the replicates, fill the **Minimum prevalence method** field with **replicate identification** option, with a minimum prevalence of "0.5".
-
-.. image:: FROGS_otu_filter_replicates_file.png
-
-In this example, if we want to keep the OTUs/ASVs that are present in at least 50% of the samples, we set the threshold at 0.5. The process will therefore keep the OTUs/ASVs present in at least 
-
- - 2 "rich" samples
- - 3 "richAB" samples,
- - 1 "lowAB" sample
- - 1 "april21" sample
-
-and all OTUs/ASVs in sample9 since it is the only representative of the "low" condition.
-
--To display the 20 biggest OTUs, fill the corresponding field with "20".
-
--To filter on abundance, we advise you to specify 0.005% i.e. 0.00005. It seems to be the optimal threshold (`Bokulich *et al*, 2013 &lt;http://www.nature.com/nmeth/journal/v10/n1/abs/nmeth.2276.html&gt;`_ ).
-
-
-@HELP_CONTACT@
-
-	</help>
-	<expand macro="citations" />
-</tool>
-
--- a/preprocess.xml	Tue Mar 07 07:28:20 2023 +0000
+++ b/preprocess.xml	Thu Mar 09 07:59:57 2023 +0000
@@ -1,4 +1,4 @@
-<tool id="FROGS_preprocess" name="FROGS Pre-process" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
+<tool id="FROGS_preprocess" name="FROGS_1 Pre-process" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
     <description>merging, denoising and dereplication</description>
      <macros>
         <import>macros.xml</import>
--- a/remove_chimera.xml	Tue Mar 07 07:28:20 2023 +0000
+++ b/remove_chimera.xml	Thu Mar 09 07:59:57 2023 +0000
@@ -1,4 +1,4 @@
-<tool id="FROGS_remove_chimera" name="FROGS Remove chimera" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
+<tool id="FROGS_remove_chimera" name="FROGS_3 Remove chimera" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
     <description>Remove PCR chimera in each sample</description>
     <macros>
         <import>macros.xml</import>
Binary file static/images/FROGS_affiliation_stat_alignment.png has changed
Binary file static/images/FROGS_affiliation_stat_bootstrap.png has changed
Binary file static/images/FROGS_affiliation_stat_rarefaction.png has changed
Binary file static/images/FROGS_affiliation_stat_sunburst.png has changed
Binary file static/images/FROGS_affiliation_stat_taxonomies.png has changed
Binary file static/images/FROGS_affiliation_stats_alignment.png has changed
Binary file static/images/FROGS_affiliation_stats_bootstrap.png has changed
Binary file static/images/FROGS_affiliation_stats_rarefaction.png has changed
Binary file static/images/FROGS_affiliation_stats_sunburst.png has changed
Binary file static/images/FROGS_affiliation_stats_taxonomies.png has changed
Binary file static/images/FROGS_cluster_stat_clusterDistrib1.png has changed
Binary file static/images/FROGS_cluster_stat_sample_dist1.png has changed
Binary file static/images/FROGS_cluster_stat_sample_dist2.png has changed
Binary file static/images/FROGS_cluster_stat_seq_dist.png has changed
Binary file static/images/FROGS_cluster_stats_clusterDistrib1.png has changed
Binary file static/images/FROGS_cluster_stats_sample_dist1.png has changed
Binary file static/images/FROGS_cluster_stats_sample_dist2.png has changed
Binary file static/images/FROGS_cluster_stats_seq_dist.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/taxonomic_affiliation.xml	Thu Mar 09 07:59:57 2023 +0000
@@ -0,0 +1,166 @@
+<tool id="FROGS_taxonomic_affiliation" name="FROGS_5 Taxonomic affiliation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
+    <description>Taxonomic affiliation of each OTU's seed by RDPtools and BLAST</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements">
+        <requirement type="package" version="2.10">blast</requirement>
+        <requirement type="package" version="6.6.0">emboss</requirement>
+    </expand>
+    <command detect_errors="exit_code"><![CDATA[
+    #set $reference_filename = str( $ref_file.fields.path )
+    export GALAXY_MEMORY_GB=\$((\${GALAXY_MEMORY_MB:-2048}/1024)) &&
+    taxonomic_affiliation.py
+        --reference '${reference_filename}'
+        --taxonomy-ranks $taxonomic_ranks
+        --input-biom '$biom_abundance'
+        --input-fasta '$fasta_sequences'
+        --output-biom '$biom_affiliation'
+        --summary '$summary'
+        @CPUS@
+        --java-mem \$GALAXY_MEMORY_GB
+        #if $rdp
+          --rdp
+        #end if
+    ]]></command>
+    <inputs>
+        <!-- Database Choice -->
+        <param name="ref_file" type="select" label="Using reference database" help="Select reference from the list">
+            <options from_data_table="frogs_db"/>
+            <validator type="no_options" message="A built-in database is not available"/>
+        </param>
+        <param argument="--rdp" type="boolean" label="Also perform RDP assignation?" help="Taxonomy affiliation will be perform thanks to Blast. This option allows to perform it also with RDP classifier tool (default No)"/>
+        <expand macro="taxonomic_ranks"/>
+        <!-- Files -->
+        <param format="fasta" name="fasta_sequences" type="data" label="Sequence file" help="The sequences to affiliated (format: FASTA)"/>
+        <param format="biom1" name="biom_abundance" type="data" label="Abundance file" help="The abundance file (format: BIOM)"/>
+    </inputs>
+    <outputs>
+        <data format="biom1" name="biom_affiliation" label="${tool.name}: affiliation_abundance.biom" from_work_dir="affiliation.biom"/>
+        <data format="html" name="summary" label="${tool.name}: report.html" from_work_dir="report.html"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="ref_file" value="ITS1_test"/>
+            <param name="fasta_sequences" value="references/04-filters.fasta"/>
+            <param name="biom_abundance" value="references/04-filters.biom"/>
+            <param name="rdp" value="true"/>
+            <output name="biom_affiliation" file="references/06-affiliation.biom" compare="sim_size" delta="5"/>
+            <output name="summary" file="references/06-affiliation.html" compare="diff" lines_diff="0"/>
+        </test>
+    </tests>
+    <help>
+
+@HELP_LOGO@
+
+.. class:: infomark page-header h2
+
+What it does
+
+this tool adds taxonomic affiliation in abundance file.
+
+
+.. class:: infomark page-header h2
+
+Inputs/outputs
+
+.. class:: h3
+
+Inputs
+
+**Sequence file**:
+
+The sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
+
+**Abundance file**:
+
+The abundance of each OTU in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).
+
+.. class:: h3
+
+Outputs
+
+**Abundance file** (tax_affiliation.biom):
+
+ The abundance file with affiliation (format `BIOM &lt;http://biom-format.org/&gt;`_).
+
+**Report file** (report.html):
+
+ This file presents the number of sequences affiliated by blast, and the number of multi-affiliation (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_).
+
+ .. image:: FROGS_affiliation_summary.png
+   :height: 975
+   :width: 867
+
+.. class:: infomark page-header h2
+
+Reference database
+
+All the databases we format (on demand) for RDPClassifier and NCBI Blast+ are inventoried here: http://genoweb.toulouse.inrae.fr/frogs_databanks/assignation/readme.txt
+
+.. class:: infomark page-header h2
+
+How it works ?
+
+.. csv-table::
+   :header: "Steps", "Description"
+   :widths: 5, 150
+   :class: table table-striped
+
+   "1", "`RDPClassifier &lt;http://rdp.cme.msu.edu/classifier/classifier.jsp&gt;`_ may be used with database to associate to each OTU a taxonomy and a bootstrap (example: *Bacteria;(1.0);Firmicutes;(1.0);Clostridia;(1.0);Clostridiales;(1.0);Clostridiaceae 1;(1.0);Clostridium sensu stricto;(1.0);*)."
+   "2", "`blastn+ &lt;https://blast.ncbi.nlm.nih.gov/Blast.cgi&gt;`_ or `needlall &lt;http://emboss.sourceforge.net/apps/release/6.6/emboss/apps/needleall.html&gt;`_ is used to find alignment between each OTU and the database. Only the bests hits with the same score are reported. blastn+ is used for merged read pair, and needall is used for artificially combined sequence. For each alignment returned, several metrics are computed: identity percentage, coverage percentage, and alignment length"
+   "3", "For each OTU with several blastn+/needlall alignment results a consensus is determined on each taxonomic level. If all the taxa at a taxonomic rank are identical the taxon name is reported otherwise *Multi-affiliation* is reported. For example, if you have an OTU with two equivalent hits, associated to *Bacteria;Proteobacteria;Gamma Proteobacteria;Enterobacteriales*, and *Bacteria;Proteobacteria;Beta Proteobacteria;Methylophilales*, the consensus will be *Bacteria;Proteobacteria;Multi-affiliation;Multi-affiliation*."
+
+.. class:: infomark page-header h2
+
+Alignment metrics details on identity percentage calculation
+
+**- Problem with classical %id computation method**
+
+* **Case 1: a sequencing of overlapping sequences i.e. 16S V3-V4 amplicon MiSeq sequencing**
+
+.. image:: FROGS_affiliation_overlapped_percent_id.png
+    :height: 325
+    :width: 807
+
+* **Case 2 : a sequencing of non-overlapping sequences: case of ITS1 amplicon MiSeq sequencing**
+
+.. image:: FROGS_affiliation_combined_percent_id.png
+    :height: 310
+    :width: 887
+
+**- Finally, how percentage identity is computed ?**
+
+With the classical method of %id calculation, filtering on %id will systematically removed “FROGS combined” OTUs. So, we proposed to replace the classical %id by a %id computed on the sequenced bases only.
+
+.. image:: FROGS_affiliation_percent_id_formula.png
+    :height: 36
+    :width: 637
+
+For the precedent use cases we will obtain:
+
+* Case 1: 16S V3V4 overlapped sequence
+
+  % sequenced bases identity = 400 matches / 400 bp = 100%
+
+* Case 2: very large ITS1 “FROGS combined” shorter than the real sequence
+
+  % sequenced bases identity = (250 + 250 ) / (600 - 100) = 100%
+
+This calculation allows to return 100% of identity on sequenced bases for “FROGS combined” shorter or longer than reality in case of perfect sequencing, and a smaller percentage of identity in the case of small overlap repeat kept in FROGS combined sequence.
+
+.. class:: infomark page-header h2
+
+
+Advices
+
+This tool can take large time. It is recommended to filter your OTU abundance and sequence files before this tool (see **FROGS OTU Filters**).
+
+As you can see the affiliation of each OTU is not human readable in outputed abundance file. We provide a tools to convert these BIOM file in tabulated file, see the **FROGS BIOM to TSV** tool.
+
+
+@HELP_CONTACT@
+
+    </help>
+    <expand macro="citations"/>
+</tool>