Mercurial > repos > oinizan > frogs

--- a/affiliation_filters.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/affiliation_filters.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -1,10 +1,9 @@
 <tool id="FROGS_affiliation_filters" name="FROGS Affiliation Filters" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
-    <description>Filters OTUs on several affiliation criteria</description>
+    <description>Filters ASVs on several affiliation criteria</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="requirements"/>
-    <command detect_errors="exit_code">
+    <expand macro="requirements"/>   <command detect_errors="exit_code">
 		affiliation_filters.py
 			--input-biom '$input_biom'
 			--input-fasta '$input_fasta'
@@ -43,11 +42,17 @@
 			#end if

 			#set $sep = ' '
-            #if $blast.taxon_ignored
+            #if $blast.keep_or_ignore.taxa_choice == "ignore"
             	--ignore-blast-taxa
-            	#for $current in $blast.taxon_ignored
-                	$sep'${current.ignore_blast_taxa}'
-            	#end for
+                #for $current in $blast.keep_or_ignore.taxon_ignored
+                    $sep'${current.ignore_blast_taxa}'
+                #end for
+            #end if
+            #if $blast.keep_or_ignore.taxa_choice == "keep"
+            	--keep-blast-taxa
+                #for $current in $blast.keep_or_ignore.taxon_kept
+                    $sep'${current.keep_blast_taxa}'
+                #end for
             #end if

 	</command>
@@ -57,30 +62,57 @@
         <param format="biom1" name="input_biom" type="data" label="Abundance file" help="The abundance file to filter (format: BIOM)"/>
         <expand macro="taxonomic_ranks"/>
         <!-- It is used for optional output fasta -->
-        <param name="mode" type="select" label="Filtering mode" multiple="false" help="Do you want to delete OTU or hide affiliations?" optional="false" display="radio">
+        <param name="mode" type="select" label="Filtering mode" multiple="false" help="Do you want to delete ASV or hide affiliations?" optional="false" display="radio">
             <option value="hide" selected="true">Hidding mode</option>
             <option value="delete">Deleting mode</option>
         </param>
         <section name="blast" title="Filter on Blast affiliations" expanded="true">
             <param argument="--max-blast-evalue" type="float" min="0.0" max="1.0" optional="true" label="Maximum e-value" help="Fill the field only if you want this treatment"/>
-            <param argument="--min-blast-identity" type="float" min="0.0" max="1.0" optional="true" label="Minimum identity" help="Fill the field only if you want this treatment"/>
-            <param argument="--min-blast-coverage" type="float" min="0.0" max="1.0" optional="true" label="Minimum coverage" help="Fill the field only if you want this treatment"/>
+            <param argument="--min-blast-identity" type="float" min="0.0" max="100" optional="true" label="Minimum identity" help="Fill the field only if you want this treatment"/>
+            <param argument="--min-blast-coverage" type="float" min="0.0" max="100" optional="true" label="Minimum coverage" help="Fill the field only if you want this treatment"/>
             <param argument="--min-blast-length" type="integer" min="0" optional="true" label="Minimum alignment length" help="Fill the field only if you want this treatment"/>
-            <repeat name="taxon_ignored" title="Filter blast affiliations including these taxon / word">
-                <param argument="--ignore-blast-taxa" type="text" optional="true" label="Full or partial taxon name" help="Example: &quot;unknown species&quot; or &quot;subsp.&quot;">
-                    <sanitizer invalid_char="">
-                        <valid initial="string.letters,string.digits">
-                            <add value="/" />
-                            <add value="+" />
-                            <add value="-" />
-                            <add value="=" />
-                            <add value=" " />
-                            <add value="_" />
-                        </valid>
-                    </sanitizer>
-                    <validator type="regex">[A-Za-z0-9 =-_/+]+</validator>
+            <conditional name="keep_or_ignore">
+                <param name="taxa_choice" type="select" label="Keyword filters of blast affiliation" help="Do you want to keep or ignore blast affiliations according a keyword ?" display="radio" >
+                    <option value="" selected="true">No filter</option>
+                    <option value="ignore">Ignore taxa</option>
+            	    <option value="keep" >Keep taxa</option>
                 </param>
-            </repeat>
+                <when value=""/>
+                <when value="ignore">
+                    <repeat name="taxon_ignored" title="Remove blast affiliations including these taxon / word">
+                        <param argument="--ignore-blast-taxa" type="text" optional="true" label="Full or partial taxon name" help="Example: &quot;unknown species&quot; or &quot;subsp.&quot;">
+                            <sanitizer invalid_char="">
+                                <valid initial="string.letters,string.digits">
+                                    <add value="/" />
+                                    <add value="+" />
+                                    <add value="-" />
+                                    <add value="=" />
+                                    <add value=" " />
+                                    <add value="_" />
+                                </valid>
+                            </sanitizer>
+                            <validator type="regex">[A-Za-z0-9 =-_/+]+</validator>
+                        </param>
+                    </repeat>
+                </when>
+                <when value="keep">
+                    <repeat name="taxon_kept" title="Keep blast affiliations including these taxon / word">
+                        <param argument="--keep-blast-taxa" type="text" optional="true" label="Full or partial taxon name" help="Example: &quot;unknown species&quot; or &quot;subsp.&quot;">
+                            <sanitizer invalid_char="">
+                                <valid initial="string.letters,string.digits">
+                                    <add value="/" />
+                                    <add value="+" />
+                                    <add value="-" />
+                                    <add value="=" />
+                                    <add value=" " />
+                                    <add value="_" />
+                                </valid>
+                            </sanitizer>
+                            <validator type="regex">[A-Za-z0-9 =-_/+]+</validator>
+                        </param>
+                    </repeat>
+                </when>
+            </conditional>
         </section>
         <section name="rdp" title="Filter on RDP affiliations" expanded="false">
             <param name="rdp_rank" type="text" optional="true" value="" label="Taxonomical rank on which to apply bootstrap filter" help="One of the available taxonomical rank name. Ex: Species">
@@ -99,8 +131,8 @@
         <data format="fasta" name="output_fasta" label="${tool.name}: affiFilter_sequences.fasta" from_work_dir="affiFilter_sequences.fasta">
             <filter> mode == 'delete'</filter>
         </data>
-        <data format="tsv" name="output_impacted" label="${tool.name}: impacted_OTU.tsv" from_work_dir="impacted.tsv"/>
-        <data format="tsv" name="output_multihit" label="${tool.name}: impacted_OTU.multi-affiliations.tsv" from_work_dir="impacted.multihit.tsv"/>
+        <data format="tsv" name="output_impacted" label="${tool.name}: impacted_clusters.tsv" from_work_dir="impacted.tsv"/>
+        <data format="tsv" name="output_multihit" label="${tool.name}: impacted_clusters.multi-affiliations.tsv" from_work_dir="impacted.multihit.tsv"/>
         <data format="html" name="output_summary" label="${tool.name}: report.html" from_work_dir="report.html"/>
     </outputs>
     <tests>
@@ -112,17 +144,17 @@
             <param name="min_rdp_bootstrap" value="0.8"/>
             <param name="min_blast_length" value="150"/>
             <param name="max_blast_evalue" value="1e-150"/>
-            <param name="min_blast_identity" value="1"/>
-            <param name="min_blast_coverage" value="1"/>
+            <param name="min_blast_identity" value="100"/>
+            <param name="min_blast_coverage" value="100"/>
             <repeat name="taxon_ignored">
                 <param name="ignore_blast_taxa" value="g__Sarcodon"/>
             </repeat>
             <repeat name="taxon_ignored">
                 <param name="ignore_blast_taxa" value="s__Trichoderma"/>
             </repeat>
-            <output name="output_impacted" file="references/07-impacted_OTU_masked.tsv" compare="diff" lines_diff="0"/>
+            <output name="output_impacted" file="references/07-impacted_clusters_masked.tsv" compare="diff" lines_diff="0"/>
             <output name="output_summary" file="references/07-affiliation_masked.html" compare="diff" lines_diff="0"/>
-            <output name="output_multihit" file="references/07-impacted_OTU_masked_multihit.tsv" compare="diff" lines_diff="0"/>
+            <output name="output_multihit" file="references/07-impacted_clusters_masked_multihit.tsv" compare="diff" lines_diff="0"/>
             <output name="output_biom" file="references/07-affiliation_masked.biom" compare="sim_size" delta="0"/>
         </test>
         <test>
@@ -133,17 +165,17 @@
             <param name="min_rdp_bootstrap" value="0.8"/>
             <param name="min_blast_length" value="150"/>
             <param name="max_blast_evalue" value="1e-150"/>
-            <param name="min_blast_identity" value="1"/>
-            <param name="min_blast_coverage" value="1"/>
+            <param name="min_blast_identity" value="100"/>
+            <param name="min_blast_coverage" value="100"/>
             <repeat name="taxon_ignored">
                 <param name="ignore_blast_taxa" value="g__Sarcodon"/>
             </repeat>
             <repeat name="taxon_ignored">
                 <param name="ignore_blast_taxa" value="s__Trichoderma"/>
             </repeat>
-            <output name="output_impacted" file="references/07-impacted_OTU_deleted.tsv" compare="diff" lines_diff="0"/>
+            <output name="output_impacted" file="references/07-impacted_clusters_deleted.tsv" compare="diff" lines_diff="0"/>
             <output name="output_summary" file="references/07-affiliation_deleted.html" compare="diff" lines_diff="0"/>
-            <output name="output_multihit" file="references/07-impacted_OTU_deleted_multihit.tsv" compare="diff" lines_diff="0"/>
+            <output name="output_multihit" file="references/07-impacted_clusters_deleted_multihit.tsv" compare="diff" lines_diff="0"/>
             <output name="output_biom" file="references/07-affiliation_deleted.biom" compare="sim_size" delta="0"/>
             <output name="output_fasta" file="references/07-affiliation_deleted.fasta" compare="diff" lines_diff="0"/>
         </test>
@@ -156,11 +188,11 @@

 What it does

-This tool removes OTUs or hides taxonomical metadata according to one or more criteria:
+This tool removes or keeps ASVs or hides taxonomical metadata according to one or more criteria:

  - for RDP taxonomy : a minimal bootstrap threshold at a specific rank

- - for blast taxonomy : a minimal identity rate, coverage rate, or alignment length, or a maximal evalue, or the absence of full or partial taxon name.
+ - for blast taxonomy : a minimal identity rate, coverage rate, or alignment length, or a maximal evalue, or the absence/presence of a full or partial taxon name.


 .. class:: infomark page-header h2
@@ -174,11 +206,11 @@

 **Abundance file**:

-The abundance of each OTU with taxonomical metadata (format `BIOM &lt;http://biom-format.org/&gt;`_).
+The abundance of each ASV with taxonomical metadata (format `BIOM &lt;http://biom-format.org/&gt;`_).

 **Sequence file** (optional):

-The OTUs seed sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_). This parameter is mandatory in case of using deleting mode.
+Only in deleting mode: The ASVs sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).


 .. class:: h3
@@ -195,15 +227,15 @@

 **Impacted abundance tabular file** (impacted.tsv):

- The list of the OTUs deleted/hidden or with updated blast affiliation (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).
+ The list of the ASVs deleted/hidden or with updated blast affiliation (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).

 **Impacted multihit tabular file** (impacted.multihit.tsv):

- The list of blast affiliations for multi-affiliated impacted OTU (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).
+ The list of blast affiliations for multi-affiliated impacted ASV (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).

 **Report file** (report.html):

- A report HTML of impacted OTUs, lost taxonomies and details by samples (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_).
+ A report HTML of impacted ASVs, lost taxonomies and details by samples (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_).

 .. class:: infomark page-header h2

@@ -215,9 +247,26 @@
    :class: table table-striped

    "1", "Compare all affiliations (RDP and all blast hits) to each filtering criteria."
-   "2", "Update concensus blast taxonomy if at least one blast criteria has an impact. When only part of the blast affiliations are removed, the concensus blast taxonomy is updated, and if it changes, the OTU is considered as *Modified*."
-   "3", "**In deleting mode**, OTU is removed either if the RDP bootstrap criteria is not respected or if none of the blast affiliation respect all the criteria . **In hidding mode**, RDP affiliation is hidden if the RDP bootstrap criteria is not respected, and independantly, blast affiliations are hidden if none of the blast affiliation respect all the criteria."
-   "4", "Write valid OTU into the filtered.biom file with potential blast affiliation updated and impacting criteria annotations.All impacted/modified OTU with their original metadata are reported in the impacted.tsv file with its associated impacted.multihit.tsv file. Impacting status (OTU_deleted / Affiliation_masked / Blast_taxonomy_changed) and the list of impacting criteria are also reported in the impacted.tsv file."
+   "2", "Update concensus blast taxonomy if at least one blast criteria has an impact. When only part of the blast affiliations are removed, the concensus blast taxonomy is updated, and if it changes, the ASV is considered as *Modified*."
+   "3", "**In deleting mode**, ASV is removed/kept either if the RDP bootstrap criteria is not respected or if none of the blast affiliation respect all the criteria . **In hidding mode**, RDP affiliation is hidden if the RDP bootstrap criteria is not respected, and independantly, blast affiliations are hidden if none of the blast affiliation respect all the criteria."
+   "4", "Write valid ASV into the filtered.biom file with potential blast affiliation updated and impacting criteria annotations. All impacted/modified ASV with their original metadata are reported in the impacted.tsv file with its associated impacted.multihit.tsv file. Impacting status (ASV_deleted / Affiliation_masked / Blast_taxonomy_changed) and the list of impacting criteria are also reported in the impacted.tsv file."
+
+
+.. class:: infomark page-header h2
+
+Ignore or Keep ?
+
+You have the choice to keep or to ignore your ASV (in the both deleting or hidding modes) according to a keyword.
+
+.. image:: FROGS_affiliation_filter_ignore.png
+
+Here, you will **hide or delete** all ASVs with "Firmicutes" in its taxonomic affiliation.
+
+.. image:: FROGS_affiliation_filter_keep.png
+
+Here, you will **keep** all ASVs with "Firmicutes" in its taxonomic affiliation.
+
+Please note that the keyword search is case sensitive.


 .. class:: infomark page-header h2
@@ -238,11 +287,11 @@

 The RDP taxonomy does not respect the RDP boostrap threshold, but all blast affiliation criteria are respected.

-In deleting mode, Cluster_1 will be removed.
-In the report.html, it will be considered as "Removed", and if the RDP taxonomy and/or the blast taxonomy is/are not kept thanks to an other OTU, the RDP/blast taxonomy(ies) will be considered as lost.
+In **deleting mode**, Cluster_1 will be removed.
+In the report.html, it will be considered as "Removed", and if the RDP taxonomy and/or the blast taxonomy is/are not kept thanks to an other ASV, the RDP/blast taxonomy(ies) will be considered as lost.

-In hidding mode, RDP taxonomy will be removed, blast taxonomy will remain unchanged and Cluster_1 will be kept.
-In the report.html, it will be considered as "Hidden", and if the RDP taxonomy is not kept thanks to an other OTU, the RDP taxonomy will be considered as lost.
+In **hidding mode**, RDP taxonomy will be removed, blast taxonomy will remain unchanged and Cluster_1 will be kept.
+In the report.html, it will be considered as "Hidden", and if the RDP taxonomy is not kept thanks to an other ASV, the RDP taxonomy will be considered as lost.

 - Cluster 2:

@@ -250,11 +299,11 @@

 The RDP taxonomy respects the RDP boostrap threshold, but none of the blast affiliations respect all the blast criteria.

-In deleting mode, Cluster_1 will be removed.
-In the report.html, it will be considered as "Removed", and if the two blast taxonomies are not kept thanks to others OTUs, they will be considered as lost. In the same way, if there is no other OTU, affiliated to Sulfurimonas genus but with an ambiguous species, 1 Multi-affilaition will be considered as lost in the report.html. And idem for the RDP taxonomy.
+In **deleting mode**, Cluster_1 will be removed.
+In the report.html, it will be considered as "Removed", and if the two blast taxonomies are not kept thanks to others ASVs, they will be considered as lost. In the same way, if there is no other ASV, affiliated to Sulfurimonas genus but with an ambiguous species, 1 Multi-affilaition will be considered as lost in the report.html. And idem for the RDP taxonomy.

-In hidding mode, RDP taxonomy will be remain unchanged, blast taxonomy will be removed and Cluster_2 will be kept.
-In the report.html, it will be considered as "Hidden", and if the two taxonomies are not kept thanks to others OTUs, they will be considered as lost. In the same way, if there is no other OTU, affiliated to Sulfurimonas genus but with an ambiguous species, 1 Multi-affilaition will be considered as lost in the report.html.
+In **hidding mode**, RDP taxonomy will be remain unchanged, blast taxonomy will be removed and Cluster_2 will be kept.
+In the report.html, it will be considered as "Hidden", and if the two taxonomies are not kept thanks to others ASVs, they will be considered as lost. In the same way, if there is no other ASV, affiliated to Sulfurimonas genus but with an ambiguous species, 1 Multi-affilaition will be considered as lost in the report.html.

 - Cluster 3:

@@ -262,9 +311,9 @@

 The RDP taxonomy respects the RDP boostrap threshold, and one of the two blast affiliations respect all the blast criteria.

-In both deleting and hidding mode, Cluster_1 will be kept.
+In **the both deleting and hidding** modes, Cluster_1 will be kept.
 In the report.html, it will be considered as "Modified", as the RDP taxonomy will remain unchanged and the blast taxonomy will be updated.
-If no other OTU is affiliated to the "unknown species" of the Fusobacterium genu, this species will be considered as lost. In the same way if no other OTU is affiliated to Fusobacterium genus but with an ambiguous species, 1 Multi-affilaition at the Species level will be considered as lost in the report.html.
+If no other ASV is affiliated to the "unknown species" of the Fusobacterium genu, this species will be considered as lost. In the same way if no other ASV is affiliated to Fusobacterium genus but with an ambiguous species, 1 Multi-affilaition at the Species level will be considered as lost in the report.html.

 @HELP_CONTACT@
--- a/affiliation_postprocess.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/affiliation_postprocess.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -1,12 +1,12 @@
 <tool id="FROGS_affiliation_postprocess" name="FROGS Affiliation postprocess" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
-    <description>Aggregates OTUs based on alignment metrics</description>
+    <description>Aggregates ASVs based on alignment metrics</description>
     <macros>
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements"/>
     <command detect_errors="exit_code">
         affiliation_postprocess.py
-		#if $is_HVL.HVL_amplicon
+		#if $is_HVL.HVL_amplicon == "Yes"
             --reference '$is_HVL.reference.fields.path'
         #end if
 		--identity $identity
@@ -22,8 +22,11 @@
         <param format="fasta" name="input_fasta" type="data" label="Sequence file" help="The sequence file to filter (format: FASTA)."/>
         <param format="biom1" name="input_biom" type="data" label="Abundance file" help="The abundance file to filter (format: BIOM)"/>
         <conditional name="is_HVL">
-            <param name="HVL_amplicon" type="boolean" label="Is this an amplicon hyper variable in length?" help="Multi-affiliation tag may be resolved by selecting the shortest amplicon reference. For this you need the reference fasta file of your targetted amplicon."/>
-            <when value="true">
+            <param name="HVL_amplicon" type="select" display="radio" label="Is this an amplicon hyper variable in length?" help="Multi-affiliation tag may be resolved by selecting the shortest amplicon reference. For this, you need the reference fasta file of your target amplicon.">
+                <option value="No">No</option>
+                <option value="Yes" >Yes</option>
+            </param>
+            <when value="Yes">
                 <param argument="--reference" type="select" label="Using reference database" help="Select reference from the list">
                     <options from_data_table="frogs_HVL_db"/>
                     <validator type="no_options" message="A built-in database is not available"/>
@@ -31,10 +34,10 @@
     					<column name="value" index="1"/-->
                 </param>
             </when>
-            <when value="false"/>
+            <when value="No"/>
         </conditional>
-        <param argument="--identity" type="float" min="0.0" max="100.0" value="99.0" label="Minimum identity for aggregation" help="OTUs will be aggregated if they share the same taxonomy with at least X% identity"/>
-        <param argument="--coverage" type="float" min="0.0" max="100.0" value="99.0" label="Minimum coverage for aggregation" help="OTUs will be aggregated if they share the same taxonomy with at least X% alignment coverage"/>
+        <param argument="--identity" type="float" min="0.0" max="100.0" value="99.0" label="Minimum identity for aggregation" help="ASVs will be aggregated if they share the same taxonomy with at least X% identity"/>
+        <param argument="--coverage" type="float" min="0.0" max="100.0" value="99.0" label="Minimum coverage for aggregation" help="ASVs will be aggregated if they share the same taxonomy with at least X% alignment coverage"/>
     </inputs>
     <outputs>
         <data format="biom1" name="biom_out" label="${tool.name}: affiliation_abundance.biom" from_work_dir="affiliation.biom"/>
@@ -62,7 +65,7 @@

 What it does

-This tool resolves multi-hit ambiguities if exact amplicon length are available and aggregrated OTUs sharing same taxonomy based on alignment metrics thresholds
+This tool resolves multi-affiliation ambiguities if exact amplicon length are available and aggregrated ASVs sharing same taxonomy based on alignment metrics thresholds


 .. class:: infomark page-header h2
@@ -75,11 +78,11 @@

 **Abundance file**:

-The abundance of each OTU in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_) with taxonomic affiliations metadata.
+The abundance of each ASV in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_) with taxonomic affiliations metadata.

 **Sequence file**:

-The sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_) of each OTU seed.
+The sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_) of each ASV sequence.

 **Reference file** (optionnal):

@@ -92,24 +95,25 @@

 **Abundance file**:

- The abundance file of OTUs and aggregated OTUs, with their affiliation (format `BIOM &lt;http://biom-format.org/&gt;`_) and with potentially less ambiguities.
+ The abundance file of ASVs and aggregated ASVs, with their affiliation (format `BIOM &lt;http://biom-format.org/&gt;`_) and with potentially less ambiguities.

 **Sequence file**:

-The sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_) of each aggregated OTU seed.
+The sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_) of each aggregated ASV sequence.

 **Composition file**:

-The aggregation composition file (format text) describing the composition of each resulting OTU.
+The aggregation composition file (format text) describing the composition of each resulting ASV.

 .. class:: infomark page-header h2

 How it works ?

-If a reference fasta file is provided, for each OTU with multiaffiliation, among the different possible affiliations, we only keep the affiliation of the sequence with the shortest length. The aim is to resolve ambiguities due to potential inclusive sequences such as ITS.
+This step is the ASVs aggregation that shares the same taxonomy inferred on alignment metrics.
+The process starts with the most abundant ASV. If an ASV shares at least one affiliation with another ASV with at least I% of identity and C% of alignment coverage, the ASVs are aggregated together : the different affiliations, are merged, blast concensus taxonomy is updated and abundance counts are summed. The sequence of the most abundant ASV is kept.

-Second step is the OTUs aggregation that shares the same taxonomy inferred on alignment metrics.
-The process starts with the most abundant OTU. If an OTU shares at least one affiliation with another OTU with at least I% of identity and C% of alignment coverage, the OTUs are aggregated together : the different affiliations, are merged, blast concensus taxonomy is updated and abundance counts are summed. The seed of the most abundant OTU is kept.
+If a reference fasta file is provided, for each ASV with multi-affiliation, among the different possible affiliations, we only keep the affiliation of the sequence with the shortest length. This is useful to resolve ambiguities due to potential inclusive sequences such as ITS.
+This step also makes it possible to group ASVs from different copies of the same marker gene, thus having the same affiliation.


 @HELP_CONTACT@
--- a/affiliation_stats.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/affiliation_stats.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -1,4 +1,4 @@
-<tool id="FROGS_affiliation_stats" name="FROGS Affiliation stats" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
+<tool id="FROGS_affiliation_stats" name="FROGS_6_Affiliation_Stat" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
 	<description>Process some metrics on taxonomies</description>
     <macros>
         <import>macros.xml</import>
@@ -61,7 +61,7 @@
 				<param argument="--identity-tag" type="text" label="Identity tag" help="The metadata tag used in BIOM file to store the alignment identity">
 					<expand macro="sanitizer_validator"/>
 				</param>
-				<param argument="--coverage-tag" type="text" label="Coverage tag" help="The metadata tag used in BIOM file to store the alignment OTUs coverage">
+				<param argument="--coverage-tag" type="text" label="Coverage tag" help="The metadata tag used in BIOM file to store the alignment ASVs coverage">
 					<expand macro="sanitizer_validator"/>
 				</param>
 			</when>
@@ -94,7 +94,7 @@

 What it does

-FROGS Affiliation stats computes several metrics and generates a HTML file describing OTUs based on their taxonomies and eventually the quality of the affiliations.
+FROGS_6_Affiliation_stats computes several metrics and generates a HTML file describing ASVs based on their taxonomies and eventually the quality of the affiliations.


 .. class:: infomark page-header h2
@@ -107,9 +107,8 @@

 **Abundance file**:

-The abundance and affiliation of each OTUs (format `BIOM &lt;http://biom-format.org/&gt;`_). This file can be produced by FROGS Affiliation OTU.
+The abundance and affiliation of each ASV (format `BIOM &lt;http://biom-format.org/&gt;`_). This file can be produced by FROGS_5_taxonomic_affiliation tool.

-The FROGS's tools working on clusters and others metagenomic workflows produce files in BIOM format.

 .. class:: h3

@@ -117,7 +116,7 @@

 **Report file** (report.html):

- OTUs taxonomies and affiliations metrics (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_):
+ ASVs taxonomies and affiliation metrics (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_):

   *-Taxonomy distribution*: displays the distribution of each taxon and the rarefaction for each taxonomic rank and for each sample

@@ -136,14 +135,14 @@
   -Bootstrap distribution: displays for affiliation methods with bootstrap the bootstrap on each taxonomic rank

   .. image:: FROGS_affiliation_stats_bootstrap.png
-    :height: 380
-    :width: 867
+    :height: 568
+    :width: 1050

   -Alignment distribution: displays for affiliation methods with alignment the distribution of identity/coverage

   .. image:: FROGS_affiliation_stats_alignment.png
-    :height: 380
-    :width: 859
+    :height: 570
+    :width: 731


 @HELP_CONTACT@
--- a/biom_to_stdBiom.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/biom_to_stdBiom.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -62,7 +62,7 @@

 How it works ?

-FROGS BIOM to std BIOM extracts the blast alignment details in a second file, write a BIOM usable in every tools using BIOM and defined the consensus taxonomy provided by blast as main taxonomy.
+**FROGS BIOM to std BIOM** extracts the blast alignment details in a second file, write a BIOM usable in every tools using BIOM and defined the consensus taxonomy provided by blast as main taxonomy.
--- a/biom_to_tsv.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/biom_to_tsv.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -11,7 +11,7 @@
 		    --input-fasta '$sequence_file'
 		#end if
 		--output-tsv '$tsv_file'
-		#if $extract_multi_align
+		#if $extract_multi_align == "yes"
 		    --output-multi-affi '$multi_affi_file'
 	    #end if
 	</command>
@@ -20,12 +20,15 @@
 		<param format="biom1" name="biom_file" type="data" label="Abundance file" help="The BIOM file to convert (format: BIOM)" optional="false" />
 		<param format="fasta" name="sequence_file" type="data" label="Sequences file (optional)" help="The sequences file (format: fasta). If you use this option the sequences will be add in TSV." optional="true" />
 		<!-- Parameters -->
-		<param name="extract_multi_align" type="boolean" label="Extract multi-alignments" help="If you have used FROGS affiliation on your data, you can extract information about multiple alignements in a second TSV." checked="true"/>
+		<param name="extract_multi_align" type="select" display="radio"  label="Extract multi-alignments" help="If you have used FROGS_5_taxonomic_affiliation on your data, you can extract information about multiple alignements in a second TSV.">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no" >No</option>
+		</param>
 	</inputs>
 	<outputs>
 		<data format="tsv" name="tsv_file" label="${tool.name}: abundance.tsv" from_work_dir="abundance.tsv"/>
 		<data format="tsv" name="multi_affi_file" label="${tool.name}: multi-affiliations.tsv" from_work_dir="multi_hits.tsv" >
-			<filter>extract_multi_align</filter>
+			<filter>extract_multi_align == "yes"</filter>
 		</data>
 	</outputs>
 	<tests>
@@ -65,27 +68,27 @@

 **Abundance file**:

- The abundance of each cluster in each sample and theirs metadata (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).
+ The abundance of each cluster in each sample and its metadata (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).

  If you add the sequences file, this information is added for each cluster.

-**Multiple affiliation file**:
+**Multi-affiliation file**:

- If you have used *FROGS affiliation* on your data, each OTU can have several affiliations: several alignments with same score on reference database. The multiple affiliation file contains details on these possibles affiliations (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).
+ If you have used *FROGS_6 taxonomic affiliation* on your data, each ASV can have several affiliations: several alignments with same score on reference database. The multi-affiliation file contains details on these possibles affiliations (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).


 .. class:: infomark page-header h2

 How it works ?

-FROGS Biom to Tsv will search if any metadata are available, and the OTU sequence if fasta file is precised. Then it will extract the OTU name, sum the sample abundance, and extract the detailed abundance for each sample. Finally, it will write all these fields separated by tabulation in the TSV file.
+**FROGS_Biom_to_TSV** will search if any metadata are available, and the ASV sequence if Sequences file (FASTA format) is precised. Then it will extract the ASV name, sum the sample abundance, and extract the detailed abundance for each sample. Finally, it will write all these fields separated by tabulations in the TSV file.


 .. class:: infomark page-header h2

 Advices

-This output tsv file is easily readable in any spreadsheet software. Be aware that these software have a number of line limit (1 048 576 for Excel and LibreOffice calc ate least). If you have more OTU, use **FROGS Filters** to extract for example the most abundant OTU before converting your BIOM abundance table in TSV file.
+This output TSV file is easily readable in any spreadsheet software. Be aware that these software have a number of line limit (1 048 576 for Excel and LibreOffice calc ate least). If you have more ASV, use **FROGS Cluster filters** to extract for example the most abundant ASVs before converting your BIOM abundance table in TSV file.


 @HELP_CONTACT@
--- a/cluster_filters.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/cluster_filters.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -66,7 +66,7 @@
 			#end if

 			#if $nb_biggest_clusters
-				--nb-biggest-clusters $nb_biggest_otu
+				--nb-biggest-clusters $nb_biggest_clusters
 			#end if


@@ -95,7 +95,7 @@
 				</param>
 			</when>
 			<when value="replicate">
-				<param name="replicate_file" type="data" format="tsv" optional="True" label="File of replicated sample names" help="Replicate file to link each sample to its group (cf. Help section)." />
+				<param name="replicate_file" type="data" format="tabular,tsv" optional="True" label="File of replicated sample names" help="Replicate file to link each sample to its group (cf. Help section)." />
 				<param name="min_replicate_presence" type="float" min="0" max="1" optional="true" label="Minimum prevalence" size="5" help="Fill the field only if you want this treatment. Keep cluster present in at least this proportion of replicates in at least one group (must be a proportion between 0 and 1).">
 				</param>
 			</when>
@@ -107,13 +107,13 @@
 				<option value="count">as count</option>
 			</param>
 			<when value="proportion">
-				<param name="min_abundance_proportion" argument="--min_abundance" type="float" optional="true" label="Minimum proportion of sequences abundancy to keep cluster" help='Fill the field only if you want this treatment. Example: 0.00005, recommended by Bokulich et al 2013, to keep cluster with at least 0.005% of all sequences'>
+				<param name="min_abundance_proportion" argument="--min_abundance" type="float" optional="true" label="Minimum proportion of sequences abundancy to keep cluster" help="Fill the field only if you want this treatment. Example: 0.00005, recommended by Bokulich et al 2013, to keep cluster with at least 0.005% of all sequences">
 					<validator type="in_range" min="0" exclude_min="true" max="1" exclude_max="true" message="Abundance proportion threshold need to be strictly greater than 0 and less than 1, otherwise you will not remove anything or remove everything." />
 				</param>
 			</when>
 			<when value="count">
-				<param name="min_abundance_count" argument="--min_abundance" type="integer" optional="true" label="Minimum number of sequences to keep cluster" help='Fill the field only if you want this treatment. Ex: 2 to keep cluster with at least 2 sequences, so remove single singleton'>
-					<validator type="in_range" min="2" message='To be effective this threshold need to be higher than 1. 2 means that you will remove cluster with 1 and only 1 sequence over all samples'/>
+				<param name="min_abundance_count" argument="--min_abundance" type="integer" optional="true" label="Minimum number of sequences to keep cluster" help="Fill the field only if you want this treatment. Ex: 2 to keep cluster with at least 2 sequences, so remove single singleton">
+					<validator type="in_range" min="2" message="To be effective this threshold need to be higher than 1. 2 means that you will remove cluster with 1 and only 1 sequence over all samples"/>
 				</param>
 			</when>
 		</conditional>
@@ -178,9 +178,9 @@

 Filter the clusters in an abundance table according to:

--The abundance and the occurence of clusters: presence in samples, cluster size and maximum number of clusters.
+	(i) The abundance and the occurence of clusters: presence in samples, cluster size and maximum number of clusters.

--Contamination: from the list of proposition (ex : phiX, a control added in Illumina sequencing technologies) or from your history (ex : a fasta file containing a list of contaminant of your choice).
+	(ii) Contamination: from the list of proposition (ex : phiX, a control added in Illumina sequencing technologies) or from your history (ex : a fasta file containing a list of contaminant of your choice).


@@ -199,7 +199,7 @@

 **Abundance file**:

-The abundance of each OTU in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).
+The abundance of each cluster in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).

 **Contaminant fasta file** (optional):

@@ -229,7 +229,7 @@

 **Excluded file** (excluded.txt):

- The list of the OTUs deleted by filters (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).
+ The list of the clusters deleted by filters (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).

 **Report file** (report.html):

@@ -241,23 +241,20 @@

 How it works?

-
-
+The ASVs kept are the ones that satisfy into the BIOM input file the thresholds specified by the user.

-The OTUs kept are the ones that satisfy into the BIOM input file the thresholds specified by the user.
+The BIOM abundance table and the fasta file are written again according to the ASVs kept.

-The BIOM abundance table and the fasta file are written again according to the OTUs kept.
-
-The OTUs discarded are listed in the excluded file.
+The clusters discarded are listed in the excluded file.

 .. csv-table::
    :header: "Steps", "description"
    :widths: 5, 150
    :class: table table-striped

-   "1", "Except the filter to select the n most abundant OTUs, all the selected filters are run independently. For each filters a list of the OTUs to remove is generated. Concerning contaminant research, OTUs are added to the previous list if it aligns on a contaminant reference sequence with 80% of identity and 80% of coverage"
-   "2", "All the OTUs tagged to remove by at least one filter are removed."
-   "3", "If the filter to select the N most abundant OTUs is filled it is applied."
+   "1", "Except the filter to select the n most abundant clusters, all the selected filters are run independently. For each filters a list of the clusters removed is generated. Concerning contaminant research, clusters are added to the previous list if it aligns on a contaminant reference sequence with 80% of identity and 80% of coverage"
+   "2", "All the clusters tagged to remove by at least one filter are removed."
+   "3", "If the filter to select the N most abundant clusters is filled it is applied."


@@ -265,28 +262,28 @@

 Advices

-Please check that the input fasta file and the input BIOM file correspond to the same OTUs.
+Please check that the input fasta file and the input BIOM file correspond to the same clusters.

-Examples for the filters on abundance and occurence of the OTUs :
+Examples for the filters on abundance and occurence of the clusters :

--To keep the filters that are present in 5 samples, fill the **Minimum prevalence method** field with **all samples** option, with "5".
+-To keep the clusters that are present in 5 samples, fill the **Minimum prevalence method** field with **all samples** option, with "5".

--To keep the filters that are present in half of the replicates, fill the **Minimum prevalence method** field with **replicate identification** option, with a minimum prevalence of "0.5".
+-To keep the clusters that are present in half of the replicates, fill the **Minimum prevalence method** field with **replicate identification** option, with a minimum prevalence of "0.5".

 .. image:: FROGS_otu_filter_replicates_file.png

-In this example, if we want to keep the OTUs/ASVs that are present in at least 50% of the samples, we set the threshold at 0.5. The process will therefore keep the OTUs/ASVs present in at least
+In this example, if we want to keep the clusters that are present in at least 50% of the samples, we set the threshold at 0.5. The process will therefore keep the clusters present in at least

  - 2 "rich" samples
  - 3 "richAB" samples,
  - 1 "lowAB" sample
  - 1 "april21" sample

-and all OTUs/ASVs in sample9 since it is the only representative of the "low" condition.
+and all clusters in sample9 since it is the only representative of the "low" condition.

--To display the 20 biggest OTUs, fill the corresponding field with "20".
+-To display the 20 **biggest** clusters, fill the corresponding field with "20".

--To filter on abundance, we advise you to specify 0.005% i.e. 0.00005. It seems to be the optimal threshold (`Bokulich *et al*, 2013 &lt;http://www.nature.com/nmeth/journal/v10/n1/abs/nmeth.2276.html&gt;`_ ).
+-To filter on **abundance** of cluster i.e. the cluster size, we advise you to specify 0.005% i.e. 0.00005. It seems to be the optimal threshold (`Bokulich *et al*, 2013 &lt;http://www.nature.com/nmeth/journal/v10/n1/abs/nmeth.2276.html&gt;`_ ).


 @HELP_CONTACT@
--- a/cluster_stats.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/cluster_stats.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -1,4 +1,4 @@
-<tool id="FROGS_cluster_stats" name="FROGS Cluster stats" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
+<tool id="FROGS_cluster_stats" name="FROGS_Cluster_Stat" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
 	<description>Process some metrics on clusters</description>
     <macros>
         <import>macros.xml</import>
@@ -76,7 +76,7 @@

 Advices

-This is a very usefull tool to see the evolution of the OTUs. Do not hesitate to run this tool after each FROGS step beginning at the clustering step.
+This is a very usefull tool to see the evolution of the clusters. Do not hesitate to run this tool after each FROGS step beginning at the clustering step.
--- a/clustering.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/clustering.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -16,11 +16,15 @@
         --output-compo '$swarms_composition'
         #if $FROGS_guidelines.guidelines_version == "3.2"
             --distance $FROGS_guidelines.distance
-            $FROGS_guidelines.fastidious
+            #if $FROGS_guidelines.fastidious == "true"
+                --fastidious
+            #end if
         #end if
         #if $FROGS_guidelines.guidelines_version == "3.1"
             --distance $FROGS_guidelines.distance
-            $FROGS_guidelines.denoising
+            #if $FROGS_guidelines.denoising == "true"
+                --denoising
+            #end if
         #end if
     </command>
     <inputs>
@@ -34,12 +38,18 @@
                 <option value="3.1">First guidelines until 3.1</option>
             </param>
             <when value="3.2">
-                <param argument="--distance" type="integer" label="Aggregation distance clustering" help="Maximum number of differences between sequences in each aggregation swarm step. (recommended d=1)" value="1" min="1" max="15" optional="false" />
-                <param argument="--fastidious" type="boolean" checked="true" truevalue="--fastidious" falsevalue="" label="Refine OTU clustering" help="Clustering will be performed with the swarm --fastidious option. It is recommended and only usable in association with a distance of 1 (default and recommended: Yes)" />
+                <param argument="--distance" type="integer" label="Aggregation distance clustering" help="Maximum number of differences between sequences in each aggregation Swarm step. (recommended d=1)" value="1" min="1" max="15" optional="false" />
+                <param argument="--fastidious" type="select" label="Refine clustering" help="Clustering will be performed with the Swarm --fastidious option. It is recommended and only usable in association with a distance of 1 (default and recommended: Yes)" display="radio" >
+                    <option value="true">Yes, refine clustering with --fastidious swarm option</option>
+                    <option value="false">No, perform clustering without refinment</option>
+                </param>
             </when>
             <when value="3.1">
-                <param argument="--distance" type="integer" value="3" min="1" max="15" optional="false" label="Aggregation distance clustering" help="Maximum number of differences between sequences in each aggregation swarm step" />
-                <param argument="--denoising" type="boolean" checked="true" truevalue="--denoising" falsevalue="" label="Efficient denoising ? (equals to a first clustering step with d=1)" help="Clustering will be perform in two steps, first with distance = 1 and then with an aggregation distance of next input parameter (default : Yes)" />
+                <param argument="--distance" type="integer" value="3" min="1" max="15" optional="false" label="Aggregation distance clustering" help="Maximum number of differences between sequences in each aggregation Swarm step" />
+                <param argument="--denoising" type="select" label="Efficient denoising ? (equals to a first clustering step with d=1)" help="Clustering will be perform in two steps, first with distance = 1 and then with an aggregation distance of next input parameter (default : Yes)" display="radio" >
+                    <option value="true">Yes, perform a first clustering with d=1</option>
+                    <option value="false">No, perform clustering in one step</option>
+                </param>
             </when>
         </conditional>
     </inputs>
@@ -146,11 +156,11 @@
    "1", "Sorting the reads by their abundance", "Sorting the reads by their abundance", "Sorting the reads by their abundance"
    "2", "/", "/", "Clusters the reads (`Swarm &lt;https://github.com/torognes/swarm&gt;`_) with an agglomeration distance of 1"
    "3", "/", "/", "Sorting the pre-clusters sequences by their abundance"
-   "4", "Clusters the reads (`Swarm &lt;https://github.com/torognes/swarm&gt;`_) with the distance you specified", "Clusters the reads (`Swarm &lt;https://github.com/torognes/swarm&gt;`_) with the distance you specified and the (`--fastidious option of swarm &lt;https://github.com/torognes/swarm/blob/master/README.md#refine-swarm-otus&gt;`_)", "Clusters the pre-clusters sequences (`Swarm &lt;https://github.com/torognes/swarm&gt;`_) with the distance you specified"
+   "4", "Clusters the reads (`Swarm &lt;https://github.com/torognes/swarm&gt;`_) with the distance you specified", "Clusters the reads (`Swarm &lt;https://github.com/torognes/swarm&gt;`_) with the distance you specified and the (`--fastidious option of Swarm &lt;https://github.com/torognes/swarm/blob/master/README.md#refine-swarm-otus&gt;`_)", "Clusters the pre-clusters sequences (`Swarm &lt;https://github.com/torognes/swarm&gt;`_) with the distance you specified"

 **Swarm focus**

-Swarm uses an iterative growth process and the use of sequence abundance values to delineate OTUs.
+Swarm uses an iterative growth process and the use of sequence abundance values to delineate clusters.

 .. image:: FROGS_cluster_swarm.png
    :height: 223
@@ -158,9 +168,9 @@

 In each growth step, the sequence of the previous step is used to find the others sequences with a number of differences inferior or equal to the "Aggregation distance".

-After agregation Swarm refines the clusters by looking at the abundancies along the connections. Theoritically the abundances must decrease when you are going away from the seed (which is often the most abundant sequence). If this abundance raises again, it means that two different clusters are connected by some poorly abundant sequences, so swarm cut the connection.
+After agregation Swarm refines the clusters by looking at the abundancies along the connections. Theoritically the abundances must decrease when you are going away from the seed (which is often the most abundant sequence). If this abundance raises again, it means that two different clusters are connected by some poorly abundant sequences, so Swarm cut the connection.

-On the other hand, the fastidious option of swarm allows to aggregate small and rare clusters into bigger one if they share sequence with at most 2*d distance. In this cases, d is restricted to 1 so, cluster distance will be 2. (image extracted from `Swarm github &lt;https://github.com/torognes/swarm&gt;`_)
+On the other hand, the fastidious option of Swarm allows to aggregate small and rare clusters into bigger one if they share sequence with at most 2*d distance. In this cases, d is restricted to 1 so, cluster distance will be 2. (image extracted from `Swarm github &lt;https://github.com/torognes/swarm&gt;`_)

 .. image:: FROGS_cluster_fastidious.png
    :height: 319
@@ -173,11 +183,11 @@

 The fastidious strategy is recommended since FROGS 3.2

-The fastidious option is recommended with an aggregating distance of 1. "It will reduce the number of small OTUs while maintaining a high clustering resolution, by postulating the existence of an intermediate amplicon sequences"
+The fastidious option is recommended with an aggregating distance of 1. "It will reduce the number of small clusters while maintaining a high clustering resolution, by postulating the existence of an intermediate amplicon sequences"

 The denoising strategy was recommended until FROGS 3.1

-The denoising step allows to build very fine clusters with minimal differences. In this case, the number of differences between sequences of each crowns is equal to 1. This first clustering is extremly quick. After the denoising, a second swarm is run with an aggregation distance >1 as you have configured, between seeds from this first clustering. We recommended a distance of 3.
+The denoising step allows to build very fine clusters with minimal differences. In this case, the number of differences between sequences of each crowns is equal to 1. This first clustering is extremly quick. After the denoising, a second Swarm is run with an aggregation distance >1 as you have configured, between seeds from this first clustering. We recommended a distance of 3.

 To have some metrics on your clusters, you can use the tool **FROGS Clusters Stat**.
--- a/deseq2_preprocess.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/deseq2_preprocess.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -8,23 +8,44 @@
         <requirement type="package" version="1.6.6">r-optparse</requirement>
     </expand>
     <command detect_errors="exit_code">
-    deseq2_preprocess.py --data '$data'
+    deseq2_preprocess.py
+        --analysis '$analysis_type.analysis'
+        #if $analysis_type.analysis == 'ASV'
+            --data '$data'
+            --out-Rdata '$dds_asv'
+        #else:
+            --input-functions '$input_functions'
+            --samplefile '$samplefile'
+            --out-Rdata '$dds_function'
+        #end if
         #if $multiple.seconde == 'true'
             --var '$var1${multiple.mod}${multiple.var2}'
         #else
             --var '$var1'
         #end if
-        --out-Rdata '$dds'
+
 	</command>
     <inputs>
 		<!-- Files -->
-        <param format="rdata" name="data" type="data" label="Phyloseq object " help="This is the result of FROGSSTAT Phyloseq Import Data with normalise option set to NO (DESeq2 is more powerful on unnormalised counts) (format RData)"/>
-        <!-- Parameters -->
-	    <param name="var1" type="text" label="Experimental variable" help="The factor suspected to have an effect on OTU abundances. Ex: Treatment, etc.">
+		<conditional name="analysis_type">
+			<param name="analysis" type="select" label="Type of analysis" help="Type of data to perform the differential analysis. ASV: DESeq2 is run on the ASV abundance table. FUNCTION: DESeq2 is run on predicted function abundance table from FROGSFUNC_2_function tool." display='radio'>
+            	<option value="ASV" selected="true">ASV</option>
+            	<option value="FUNCTION">FUNCTION</option>
+			</param>
+		<when value="ASV">
+            <param format="rdata" name="data" type="data" label="Phyloseq object " help="This is the result of FROGSSTAT_Phyloseq_Import_Data without normalisation (DESeq2 is more powerful on unnormalised counts) (format RData)"/>
+		</when>
+		<when value="FUNCTION">
+            <param format="tsv" name="input_functions" type="data" label="Function abundances file " help="Input file of predicted function abundances (frogsfunc_functions_unstrat.tsv from FROGSFUNC_2_function tool)."/>
+            <param format="tabular,tsv" argument="--samplefile" type="data" label="Sample file (format: TSV)" help="The file must contain the metadata that characterise each sample."/>
+		</when>
+		</conditional>
+        <!-- Parameters -->
+	    <param name="var1" type="text" label="Experimental variable" help="The factor that could have an effect on ASV/FUNCTION abundances. Ex: Treatment, etc.">
             <expand macro="sanitizer_validator"/>
         </param>
 		<conditional name="multiple">
-            <param name="seconde" type="select" label="Do you want to correct for a confounding factor?" help="If yes, specifiy counfouding factor">
+            <param name="seconde" type="select" label="Do you want to correct a confounding factor?" help="If yes, specify the counfouding factor">
                 <option value="true">True</option>
                 <option value="false" selected="true">False</option>
             </param>
@@ -41,7 +62,15 @@
         </conditional>
     </inputs>
     <outputs>
-        <data format="rdata" name="dds" label="${tool.name}: dds.Rdata" from_work_dir="DESeq2_preprocess.Rdata"/>
+        <data format="rdata" name="dds_asv" label="${tool.name}: asv_dds.Rdata" from_work_dir="asv_dds.Rdata">
+            <filter> analysis_type['analysis'] == 'ASV'</filter>
+        </data>
+        <data format="rdata" name="dds_function" label="${tool.name}: function_dds.Rdata" from_work_dir="function_dds.Rdata">
+            <filter> analysis_type['analysis'] == 'FUNCTION'</filter>
+        </data>
+        <data format="rdata" name="phyloseq" label="${tool.name}: function_data.Rdata" from_work_dir="function_data.Rdata">
+            <filter> analysis_type['analysis'] == 'FUNCTION'</filter>
+        </data>
     </outputs>
     <tests>
         <test>
@@ -68,27 +97,41 @@

 Input

-**phyloseq object** (format rdata):
-A phyloseq object stored in a rdata file.
-This file is the result of FROGSSTAT Phyloseq Import Data.
+Two cases :
+
+-1. for **ASV**:	A **phyloseq object** stored in a Rdata file. This file is the result of FROGSSTAT Phyloseq Import Data. We need unnormalised data to analyse the abundance differencies.

 .. class:: warningmark

 We need unnormalised data to analyse the abundance differencies.

+
+-2. For **FUNCTION**: two files:
+		- the files that contains predicted **function abundances**, it is named frogsfunc_functions_unstrat.tsv and produced by FROGSFUNC_2_function tool.
+		- the file that must contain the **metadata** that characterise each sample i.e. the conditions of experiment with sample ID in the first column as:
+
+  .. image:: FROGS_Phyloseq_samplefile.png
+     :height: 115
+     :width: 369
+
+
 .. class:: h3

 Ouput

-**dds object** (format rdata):
-A DESeq2 dataset (dds) stored in rdata file.
-This result will be one of the input of the FROGSSTAT DESeq Visualisation tool.
+Two cases :
+
+1.	For **ASV**: a **dds.object** that is a DESeq2 dataset (dds) stored in Rdata file. This result will be one of the input of the FROGSSTAT DESeq Visualisation tool.
+2.	For **FUNCTION**:
+		- a **dds.object** that is a DESeq2 dataset (dds) stored in Rdata file. This result will be one of the input of the FROGSSTAT DESeq Visualisation tool.
+		- a **abundance.Rdata** taht contains information of data in one phyloseq object.
+

 .. class:: infomark page-header h2

 How it works ?

-The DESeq function performs a default analysis through the steps:
+The DESeq2 function performs a default analysis through the steps:

        1. estimation of size factors: ‘estimateSizeFactors’
--- a/deseq2_visualisation.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/deseq2_visualisation.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -1,5 +1,5 @@
-<tool id="FROGSSTAT_DESeq2_Visualisation" name="FROGSTAT Deseq2 Visualisation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
-    <description>to extract and visualise differentially abundant OTUs</description>
+<tool id="FROGSSTAT_DESeq2_Visualisation" name="FROGSSTAT DESeq2 Visualisation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
+    <description>to extract and visualise differentially abundant ASVs or functions</description>
     <macros>
         <import>macros.xml</import>
     </macros>
@@ -9,24 +9,33 @@
     </expand>
     <command detect_errors="exit_code">
     deseq2_visualisation.py
-        --phyloseqData '$phyloseqData'
+        --analysis '$analysis'
+        --abundanceData '$abundanceData'
         --dds '$dds'
         --var '$var'
         #if $varType.vartType_selected == "qual":
             --mod1 '$varType.mod1'
             --mod2 '$varType.mod2'
         #end if
+        #if $analysis == "FUNCTION":
+            --ipath-over '$over'
+            --ipath-under '$under'
+        #end if
         --padj $padj
         --html '$html'
     </command>
     <inputs>
         <!-- Files -->
-        <param format="rdata" argument="--phyloseqData" type="data" label="Phyloseq object (format: RData)" help="This is the result of FROGS Phyloseq Import Data, used in FROGSSTAT DESeq2 Preprocess tool" />
-        <param format="rdata" argument="--dds" type="data" label="DESeq2 object (format: RData)" help="This is the result of FROGSSTAT DESeq2 Preprocess tool"/>
+        <param name="analysis" type="select" label="Type of analysis" help="Type of data to perform the differential analysis. ASV: DESeq2 is run on the ASV abundance table. FUNCTION: DESeq2 is run on predicted function abundance table from FROGSFUNC_2_function tool." display='radio'>
+            	<option value="ASV" selected="true">ASV</option>
+            	<option value="FUNCTION">FUNCTION</option>
+		</param>
+        <param format="rdata" argument="--abundanceData" type="data" label="Data object (format: data.RData)" help="For ASV: asv_data.Rdata from FROGSSTAT_Phyloseq_Import_Data tool - For FUNCTION: function_data.Rdata from FROGSSTAT_DESeq2_Preprocess tool." />
+        <param format="rdata" argument="--dds" type="data" label="DESeq2 object (format: dds.RData)" help="This is the result of FROGSSTAT_DESeq2_Preprocess tool asv_dds.Rdata or function_dds.Rdata"/>
         <!-- Parameters -->
-        <param argument="var" type="text" value="" label="Experimental variable" help="The suspected factor to have an effect on OTU abundances (one of the variables used in FROGS DESeq2 Preprocess tool). Ex : Treatment"/>
+        <param argument="var" type="text" value="" label="Experimental variable" help="The factor that could have an effect on ASV/FUNCTION abundances. Ex : Treatment"/>
         <conditional name="varType">
-            <param type="select" name="vartType_selected" label="Is your variable quantitative or qualitative?" help="If qualitative, choose 2 conditions to compare">
+            <param type="select" name="vartType_selected" label="The experimental variable is it quantitative or qualitative?" help="If qualitative, choose 2 conditions to compare">
                 <option value="qual">Qualitative</option>
                 <option value="quant">Quantitative</option>
             </param>
@@ -40,14 +49,20 @@
             </when>
             <when value="quant"/>
         </conditional>
-        <param argument="--padj" type="float" value="0.05" label="Adjusted p-value threshold" help="Threshold used for statistical significance of the differentially abundant OTU analysis"/>
+        <param argument="--padj" type="float" value="0.05" label="Adjusted p-value threshold" help="Threshold used for statistical significance of the differentially abundant ASV/FUNCTION analysis"/>
     </inputs>
     <outputs>
         <data format="html" name="html" label="${tool.name}: report.nb.html" from_work_dir="report.nb.html"/>
+        <data format="tsv" name="over" label="${tool.name}: ipath_over.tsv" from_work_dir="ipath_over.tsv">
+            <filter>analysis == "FUNCTION"</filter>
+        </data>
+        <data format="tsv" name="under" label="${tool.name}: ipath_under.tsv" from_work_dir="ipath_under.tsv">
+            <filter>analysis == "FUNCTION"</filter>
+        </data>
     </outputs>
     <tests>
         <test>
-            <param name="phyloseqData" value="references/16-phylo_import.Rdata"/>
+            <param name="abundanceData" value="references/16-phylo_import.Rdata"/>
             <param name="dds" value="references/23-deseq2_preprocess.Rdata"/>
             <param name="var" value="EnvType" />
             <conditional name="varType">
@@ -90,27 +105,36 @@

 Input

-**phyloseq object** (format rdata):
-One phyloseq object stored in a rdata file.
-This file is the result of FROGSSTAT Phyloseq Import Data.
+**-Data object-** (format data.Rdata):
+One *phyloseq* object stored in a Rdata file.
+This file is the result of :

-**dds object** (format rdata):
-A DESeq2 dataset (dds) stored in rdata file.
-This file is the result of FROGSSTAT DESeq2 preprocess.
+	1.	for **ASV**: asv.dds.Rdata from FROGSSTAT_Phyloseq_Import_Data tool
+	2.	for **FUNCTION**: DESeq2_preprocess_tool output for FUNCTION parameter.
+
+**-DESeq2 object-** (format dds.Rdata):
+A DESeq2 dataset (dds) stored in Rdata file.
+This file is the result of FROGSSTAT_DESeq2_preprocess tool.

 .. class:: h3

 Ouput

-**html file** (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_): visualisation of "Differential Abundance".
+**-2 TSV files-**
+- ipath_under.tsv
+- ipath_over.tsv
+To visualise and explore metabolic pathways with `IPATH3 website  &lt;https://pathways.embl.de/&gt;`_ , use the two files **ipath_under.tsv** and **ipath_over.tsv** as inputs.
+
+
+**-html file-** (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_): visualisation of *Differential Abundance* of ASV or FUNCTIONS depending on the case.

 The html file contains Table, Pie Chart, MA plot, Volcano plot and Heatmap plot. If the experimental variable is qualitative, only samples corresponding to the 2 compared conditions are shown in the Heatmap. Otherwise, samples are sorted in increasing order of the experimental variable.

-* Table containing the differentially abundant OTUs.
+* Table containing the differentially abundant ASVs.

  .. image:: FROGS_DESeq2_html_table.png
-    :height: 216
-    :width: 789
+    :height: 448
+    :width: 641

 * Pie Chart, MA plot and Volcano plot

@@ -126,17 +150,23 @@
     :height: 506
     :width: 633

-* Heatmap plot corresponding to the differentially abundant OTUs.
+* Heatmap plot corresponding to the differentially abundant ASVs.

  .. image:: FROGS_DESeq2_html_heatmap_plot.png
     :height: 576
     :width: 768

+* Differentially abundant functions visualized with iPath 3 (FUNCTION analysis only).
+
+ .. image:: FROGS_DESeq2_html_ipath.png
+    :height: 720
+    :width: 584
+
 .. class:: infomark page-header h2

 How it works ?

-Based on the variable you precise to construct the model in FROGSSTAT DESeq2 Preprocess, this tool will construct table and graphs to visualise differentially abundant OTU between condition of the selected variables.
+Based on the variable you precise to construct the model in FROGSSTAT DESeq2 Preprocess, this tool will construct table and graphs to visualise differentially abundant ASV or functions between condition of the selected variables.

 You may first precise the variable used to construct the model during the FROGSSTAT DESeq Preprocess step. If you precised variable with a confounding factor (a second variable), you may choose between one of the variables, but remember that you will see the result of this variable corrected by the confounding factor (and reversely) not just the selected variable itself.
--- a/frogsfunc_functions.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/frogsfunc_functions.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -15,14 +15,15 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 -->
-<tool id="FROGSFUNC_step3_functions" name="FROGSFUNC_step3_functions" version= "@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+<tool id="FROGSFUNC_step3_functions" name="FROGSFUNC_2_functions" version= "@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
     <description>Calculates functions abundances in each sample.</description>

-    <macros>
+  <macros>
         <import>macros.xml</import>
-    </macros>
+  </macros>

-    <expand macro="requirements_frogsfunc" />
+  <expand macro="requirements_frogsfunc" />
+

     <stdio>
         <exit_code range="1:" />
@@ -30,33 +31,112 @@
     </stdio>
     <command >
        frogsfunc_functions.py
+            @CPUS@
             --input-biom $input_biom
             --input-fasta $input_fasta
-            --input-function $function
-            --input-marker $marker
+            --input-tree $input_tree
+            --input-marker $input_marker
+            --marker-type $category.value
+            #if $category.value == "16S"
+                --functions $functions
+            #end if
+            #if $category.value != "16S"
+                --input-function-table $functions.fields.traits
+            #end if
             --max-nsti $max_nsti
-            --output-function-abund $function_abund
-            --seqtab $seqtab
-            --weighted $weighted
-            --excluded $excluded
+            --min-blast-ident $min_blast_ident
+            --min-blast-cov $min_blast_cov
+	   		--hsp-method $hsp_method
+            --output-biom $output_biom
+            --output-fasta $output_fasta
+            --output-function-abund "frogsfunc_functions_unstrat.tsv"
+            --output-otu-norm $output_otu_norm
+            --output-weighted $output_weighted
+            --output-excluded $output_excluded
             --summary $summary_file
+
     </command>
     <inputs>
         <!-- Input files -->
-        <param argument="--input-biom" format="biom1" name="input_biom" type="data" label="Biom file" help="The abundance file i.e. FROGSFUNC_step1_placeseqs tool output file (frogsfunc_placeseqs.biom)." optional="false"/>
-       	<param argument="--input-fasta" format="fasta" name="input_fasta" type="data" label="Sequence file" help="The fasta file i.e. from FROGSFUNC_step1_placeseqs tool output file (frogsfunc_placeseqs.fasta)." optional="false"/>
-        <param argument='--input-function' format="tsv" type="data" label="Function file" help="Copy number table of functions present in the predicted genome for each OTU i.e. FROGSFUNC_step2_copynumbers tool output file (frogsfunc_copynumbers_predicted_functions.tsv)." optional="false"/>
-        <param argument='--input-marker' format="tsv" type="data" label="Marker file" help="Table of predicted marker copy number i.e. FROGSFUNC_step2_copynumbers output (frogsfunc_copynumbers_marker.tsv)." optional="false"/>
+        <param argument="--input-biom" format="biom1" type="data" label="Biom file" help="The abundance file i.e. FROGSFUNC_1_placeseqs_copynumber tool output file (frogsfunc_placeseqs.biom)." optional="false"/>
+       	<param argument="--input-fasta" format="fasta" type="data" label="Sequence file" help="The fasta file i.e. from FROGSFUNC_1_placeseqs_copynumber tool output file (frogsfunc_placeseqs.fasta)." optional="false"/>
+        <param argument="--input-tree" format="nhx" type="data" label="Tree file" help="The file contains the tree information from FROGSFUNC_1_placeseqs_copynumber tool (frogsfunc_placeseqs_tree.nwk)." optional="false"/>
+        <param argument="--input-marker" format="tsv" type="data" label="Marker file" help="Table of predicted marker copy number i.e. FROGSFUNC_1_placeseqs_copynumber output (frogsfunc_marker.tsv)." optional="false"/>

         <!-- Parameters-->
-        <param argument="--max-nsti" name="max_nsti" type="float" label="NSTI cut-off" help="Any sequence with an NSTI above this threshold will be out. (default: 2)" value="2" min="0" optional="false" />
+	    <param name="category" type="select" label="Taxonomic marker" help="Taxonomic marker of interest." multiple="false" display="radio">
+            <options from_data_table="frogs_picrust2_marker_table">
+                <column name='name' index='0' />
+                <column name='value' index='0' />
+                <filter type="unique_value" column='0'/>
+                    <validator type="no_options" message="A built-in database is not available" />
+            </options>
+		</param>
+		<param argument="--functions" type="select" label="Target function database" multiple="true" optional="false" help=" 16S : at least 'EC' or/and 'KO' should be chosen (EC for Metacyc pathway analysis or/and KO for KEGG pathway analysis) - others values are optionnal. ITS and 18S : 'EC' only available." >
+			<options from_data_table="frogs_picrust2_marker_table">
+				<column name='name' index='1' />
+				<column name='value' index='1' />
+				<column name='path' index='2' />
+				<column name='traits' index='3' />
+                <filter type="param_value" ref="category" column="0" />
+ 		<validator type="expression" message="'EC' is the default database used by PICRUSt2. 'EC' or 'KO' must be at least selected. Other tables are optionnal">"EC" in value or "KO" in value</validator>
+            </options>
+        </param>
+        <param argument="--max-nsti" type="float" label="NSTI cut-off" help="Any sequence with an NSTI above this threshold will be out. (default: 2)" value="2" min="0" optional="false" />
+        <param argument="--min-blast-ident" type="float" label="Identity alignment cut-off" help="Percentage identity of the alignment between the input sequence and the PICRUSt2 reference sequence. Below this threshold, all sequences will be discarded. (default: None)" value="0" min="0" max="1" optional="true" />
+        <param argument="--min-blast-cov" type="float" label="Coverage alignment cut-off" help="Coverage identity of the alignment between the input sequence and the PICRUSt2 reference sequence. Below this threshold, all sequences will be discarded.  (default: None)" value="0" min="0" max="1" optional="true" />
+		<param argument="--hsp-method" type="select" label="HSP method" help="Hidden-state prediction method to use: maximum parsimony (mp), empirical probabilities (emp_prob), continuous traits prediction using subtree averaging (subtree_average), continuous traits prediction with phylogentic independent contrast (pic), continuous traits reconstruction using squared-change parsimony (scp) (default: mp)." multiple="false" display="radio">
+            <option value="mp">mp</option>
+            <option value="emp_prob">emp_prob</option>
+            <option value="pic">pic</option>
+            <option value="scp">scp</option>
+            <option value="subtree_average">subtree_average</option>
+		</param>
     </inputs>
     <outputs>
         <data format="html" name="summary_file" label="${tool.name}: report.html" from_work_dir="report.html"/>
-        <data format="tsv" name="seqtab" label="${tool.name}: frogsfunc_functions_marker_norm.tsv" from_work_dir="frogsfunc_functions_marker_norm.tsv.tsv"/>
-        <data format="tsv" name="weighted" label="${tool.name}: frogsfunc_functions_weighted_nsti.tsv" from_work_dir="frogsfunc_functions_weighted_nsti.tsv"/>
-        <data format="tsv" name="excluded" label="${tool.name}: frogsfunc_functions_excluded.tsv" from_work_dir="frogsfunc_functions_excluded.tsv"/>
-        <data format="tsv" name="function_abund" label="${tool.name}:   frogsfunc_functions_unstrat.tsv" from_work_dir=" frogsfunc_functions_unstrat.tsv"/>
+		<data format="biom1" name="output_biom" label="${tool.name}: frogsfunc_functions.biom" from_work_dir="frogsfunc_functions.biom"/>
+		<data format="fasta" name="output_fasta" label="${tool.name}: frogsfunc_functions.fasta" from_work_dir="frogsfunc_functions.fasta"/>
+
+        <data format="tsv" name="output_otu_norm" label="${tool.name}: frogsfunc_functions_marker_norm.tsv" from_work_dir="frogsfunc_functions_marker_norm.tsv.tsv"/>
+        <data format="tsv" name="output_weighted" label="${tool.name}: frogsfunc_functions_weighted_nsti.tsv" from_work_dir="frogsfunc_functions_weighted_nsti.tsv"/>
+        <data format="tsv" name="output_excluded" label="${tool.name}: frogsfunc_functions_excluded.tsv" from_work_dir="frogsfunc_functions_excluded.tsv"/>
+        <data format="tsv" name="output_copy_ec_abund" label="${tool.name}: EC_copynumbers_predicted.tsv" from_work_dir="EC_copynumbers_predicted.tsv">
+            <filter>"EC" in functions</filter>
+        </data>
+        <data format="tsv" name="output_copy_ko_abund" label="${tool.name}: KO_copynumbers_predicted.tsv" from_work_dir="KO_copynumbers_predicted.tsv">
+            <filter>"KO" in functions</filter>
+        </data>
+        <data format="tsv" name="output_copy_cog_abund" label="${tool.name}: COG_copynumbers_predicted.tsv" from_work_dir="COG_copynumbers_predicted.tsv">
+            <filter>"COG" in functions</filter>
+        </data>
+        <data format="tsv" name="output_copy_pfam_abund" label="${tool.name}: PFAM_copynumbers_predicted.tsv" from_work_dir="PFAM_copynumbers_predicted.tsv">
+            <filter>"PFAM" in functions</filter>
+        </data>
+        <data format="tsv" name="output_copy_tigrfam_abund" label="${tool.name}: TIGRFAM_copynumbers_predicted.tsv" from_work_dir="TIGRFAM_copynumbers_predicted.tsv">
+            <filter>"TIGRFAM" in functions</filter>
+        </data>
+        <data format="tsv" name="output_copy_pheno_abund" label="${tool.name}: PHENO_copynumbers_predicted.tsv" from_work_dir="PHENO_copynumbers_predicted.tsv">
+            <filter>"PHENO" in functions</filter>
+        </data>
+        <data format="tsv" name="output_function_ec_abund" label="${tool.name}:  frogsfunc_functions_unstrat_EC.tsv" from_work_dir="frogsfunc_functions_unstrat_EC.tsv">
+            <filter>"EC" in functions</filter>
+        </data>
+        <data format="tsv" name="output_function_ko_abund" label="${tool.name}:  frogsfunc_functions_unstrat_KO.tsv" from_work_dir="frogsfunc_functions_unstrat_KO.tsv">
+            <filter>"KO" in functions</filter>
+        </data>
+        <data format="tsv" name="output_function_cog_abund" label="${tool.name}:  frogsfunc_functions_unstrat_COG.tsv" from_work_dir="frogsfunc_functions_unstrat_COG.tsv">
+            <filter>"COG" in functions</filter>
+        </data>
+        <data format="tsv" name="output_function_pfam_abund" label="${tool.name}:  frogsfunc_functions_unstrat_PFAM.tsv" from_work_dir="frogsfunc_functions_unstrat_PFAM.tsv">
+            <filter>"PFAM" in functions</filter>
+        </data>
+        <data format="tsv" name="output_function_tigrfam_abund" label="${tool.name}:  frogsfunc_functions_unstrat_TIGRFAM.tsv" from_work_dir="frogsfunc_functions_unstrat_TIGRFAM.tsv">
+            <filter>"TIGRFAM" in functions</filter>
+        </data>
+        <data format="tsv" name="output_function_pheno_abund" label="${tool.name}:  frogsfunc_functions_unstrat_PHENO.tsv" from_work_dir="frogsfunc_functions_unstrat_PHENO.tsv">
+            <filter>"PHENO" in functions</filter>
+        </data>
     </outputs>


@@ -70,11 +150,11 @@
             <param name="min_samples" value="1" />
             <param name="strat" value="false" />

-            <output name="function_abund" file="references/27-frogsfunc_functions_unstrat.tsv" compare="diff" lines_diff="0" />
-            <output name="seqtab" file="references/27-frogsfunc_functions_marker_norm.tsv" compare="diff" lines_diff="0" />
-            <output name="weighted" file="references/27-frogsfunc_functions_weighted_nsti.tsv" compare="diff" lines_diff="0" />
+            <output name="output_function_abund" file="references/27-frogsfunc_functions_unstrat.tsv" compare="diff" lines_diff="0" />
+            <output name="output_otu_norm" file="references/27-frogsfunc_functions_marker_norm.tsv" compare="diff" lines_diff="0" />
+            <output name="output_weighted" file="references/27-frogsfunc_functions_weighted_nsti.tsv" compare="diff" lines_diff="0" />
             <output name="summary_file" file="references/27-frogsfunc_functions_report.html" compare="diff" lines_diff="0" />
-            <output name="excluded" file="references/27-frogsfunc_functions_excluded.txt" compare="diff" lines_diff="0" />
+            <output name="output_excluded" file="references/27-frogsfunc_functions_excluded.txt" compare="diff" lines_diff="0" />
         </test>
     </tests>

@@ -86,13 +166,18 @@

 What it does

-Predicting of functions weighted by the relative abundance of OTUs in the community. Inferring the metagenomes of the communities with `PICRUSt2 &lt;https://github.com/picrust/picrust2&gt;`_.
-There are two steps performed at this stage:
+FROGSFUNC_2_functions is the second step of PICRUSt2. It ables to predicts :
+	(i) Functional abundances based solely on the sequences of marker genes with PICRUSt2. The available marker genes are 16S, ITS and 18S.
+
+	(ii) Functions, weighted by the relative abundance of ASVs in the community. Inferring the metagenomes of the communities with `PICRUSt2 &lt;https://github.com/picrust/picrust2&gt;`_.
+

-    (i) The read depth per OTU is divided by the predicted marker (16S/ITS/18S) copy numbers. This is performed to help control for variation in marker copy numbers across organisms, which can result in interpretation issues.
-        For instance, imagine an organism with five identical copies of the 16S gene that is at the same absolute abundance as an organism with one 16S gene. The OTU corresponding to the first organism would erroneously be inferred to be at higher relative abundance simply because this organism had more copies of the 16S gene.
-
-    (ii) The OTU read depths per sample (after normalizing by marker (16S/ITS/18S) copy number) are multiplied by the predicted function copy numbers per OTU.
+There are three steps performed at this stage:
+	(i) It runs hidden-state prediction (hsp) to predict function abundances with castor-R of each ASVs placed in the PICRUSt2 reference phylogenetic tree (FROGSFUNC_1_placeseqs_copynumber outputs).
+
+    (ii) The read depth per ASV is divided by the predicted marker (16S/ITS/18S) copy numbers. This is performed to help control for variation in marker copy numbers across organisms, which can result in interpretation issues. For instance, imagine an organism with five identical copies of the 16S gene that is at the same absolute abundance as an organism with one 16S gene. The ASV corresponding to the first organism would erroneously be inferred to be at higher relative abundance simply because this organism had more copies of the 16S gene.
+
+    (iii) The ASV read depths per sample (after normalizing by marker (16S/ITS/18S) copy number) are multiplied by the predicted function copy numbers per ASV.


 .. class:: infomark page-header h2
@@ -105,65 +190,132 @@
 Inputs


-**Biom file**:
+**-Biom file-**:
+
+The ASVs biom file from FROGSFUNC_1_placeseqs_copynumber tool (format `biom1 &lt;http://biom-format.org/documentation/format_versions/biom-1.0.html&gt;`_). (FROGSFUNC_1_placeseqs_copynumber.biom from FROGSFUNC_1_placeseqs_copynumber)
+
+**-Sequence file-**:

- The OTUs biom file from FROGSFUNC_step1_placeseqs tool (format `biom1 &lt;http://biom-format.org/documentation/format_versions/biom-1.0.html&gt;`_). (frogsfunc_placeseqs.biom)
+The sequence file of inserted ASVs into PICRUST2 reference tree from (frogsfunc_placesesqs.fasta from FROGSFUNC_1_placeseqs_copynumber step).
+
+**-Tree file (format newick nwk)-**:
+
+The file contains the tree informations from FROGSFUNC_1_placeseqs_copynumber step (FROGSFUNC_1_placeseqs_copynumber output : FROGSFUNC_1_placeseqs_copynumber_tree.nwk)

-**Function file**:
+**-Marker file-**:
+
+Output table of predicted marker gene copy numbers per sequence. (frogsfunc_marker.tsv from FROGSFUNC_1_placeseqs_copynumber step)
+
+.. class:: h3

- The table of predicted function abundance from FROGSFUNC_step2_copynumbers tool. (frogsfunc_copynumbers_predicted_functions.tsv)
+Parameters
+
+
+**-Taxonomic marker-**:
+
+Marker gene to be analyzed from the previous FROGSFUNC_1_placeseqs_copynumber step (frogsfunc_marker.tsv from FROGSFUNC_1_placeseqs_copynumber).
+
+**-Target function database-**:

-**Marker file**:
+Which default pre-calculated count table to use ?
+ - For 16S rRNA gene you can choose between: 'EC', 'KO', 'PFAM', 'COG', 'TIGRFAM', and/or 'PHENO'. You must select at least 'EC' or 'KO' because for next FROGSFUNC tools, the information from Metacyc (EC) or KEGG (KO) are requiered.
+ - For ITS and 18S markers, 'EC' is only available.
+
+For more informations about the different databases:

- Output table of predicted marker gene copy numbers per sequence from FROGSFUNC_step2_copynumbers tool. (frogsfunc_copynumbers_marker.tsv)
+ - EC : https://enzyme.expasy.org/
+ - KO : https://www.genome.jp/kegg/ko.html
+ - PFAM : http://pfam.xfam.org/
+ - COG : https://www.ncbi.nlm.nih.gov/research/cog-project/
+ - TIGRFAM : https://tigrfams.jcvi.org/cgi-bin/index.cgi
+ - PHENO : https://phenodb.org/
+
+**-NSTI cut-off-**:

-**NSTI cut-off**:
+ Nearest Sequenced Taxon Index (`NSTI &lt;https://www.nature.com/articles/nbt.2676&gt;`_) is the phylogenetic distance between the ASV and the nearest sequenced reference genome. This metric can be used to identify ASVs that are highly distant from all reference sequences (the predictions for these sequences are less reliable!). The higher the NSTI score, the less the affiliations are relevant. Any ASVs with a NSTI value higher than 2 are typically either from uncharacterized phyla or off-target sequences.
+
+**-Identity alignment cut-off-**:
+
+ All sequences with a identity percentage of alignment against the PICRUSt2 closest reference sequence is lower than this value will be excluded (between 0 and 1).

- Nearest Sequenced Taxon Index (`NSTI &lt;https://www.nature.com/articles/nbt.2676&gt;`_) is the phylogenetic distance between the OTU and the nearest sequenced reference genome. This metric can be used to identify OTUs that are highly distant from all reference sequences (the predictions for these sequences are less reliable!). The higher the NSTI score, the less the affiliations are relevant. Any OTUs with a NSTI value higher than 2 are typically either from uncharacterized phyla or off-target sequences.
+**-Coverage alignment cut-off-**:
+
+ All sequences with a coverage percentage of alignment against the PICRUSt2 closest reference sequence is lower than this value will be excluded (between 0 and 1).
+
+**-HSP method-**:
+
+ Hidden-state prediction method to use.
+

-
 .. class:: h3

 Outputs

-**Report file**: (report.html)
+**-Fasta file-**:
+
+ Sequence file without excluded ASVs (NSTI, blast perc identity or blast perc coverage thresholds). (FROGSFUNC_2_functions.fasta)
+
+**-ASV abundance Biom file - one per chosen target function database (EC, KO, PFAM, COG, TIGRFAM,PHENO)-**:
+
+ ASV abundance data i a biom file without excluded ASVs (NSTI, %identity or %coverage thresholds alignment). (FROGSFUNC_2_functions.biom)
+
+**-Function abundance file-**:
+
+ It is the function abundance predictions of metagenome, per sample. (frogsfunc_functions_unstrat_DATABASENAME.tsv, for exemple: FROGSFUNC_2_functions_unstrat_EC.tsv)
+
+Table column description:
+ - classification: the hierarchy classification of the gene function.
+ - db_link: the url on the link accession ID (*observation_name*) of the function.
+ - observation_name: Accession identifier
+ - observation_sum: Total abundance of functions across all samples.
+ - last columns: Abundances of these functions in each samples.
+
+**-ASV normalized abundance table-**:
+
+ Table with normalized abundances per marker copy number from FROGSFUNC_1 step. (FROGSFUNC_2_functions_marker_norm.tsv)
+
+**-Weighted NSTI file-**:
+
+ Output file with the mean of NSTI value per sample. (FROGSFUNC_2_functions_weighted_nsti.tsv)
+
+**-Excluded sequences file-**:
+
+Information about removed sequences that have a NSTI value aboved the NSTI threshold chosen in this step:
+ - ASV: ASV name id.
+ - FROGS_taxonomy
+ - PICRUSt2_taxonomy
+ - exclusion_paramater: The paramater(s) that excluded the ASVs.
+ - value_parameter: The values associated with the paramater(s).
+
+**-Copy number marker file - one per chosen target function database (EC, KO, PFAM, COG, TIGRFAM,PHENO)-**:
+
+Output table of predicted function copy numbers per ASV. There are as many tables as chosen target function database (EC, KO, PFAM, COG, TIGRFAM,PHENO) (exemple : FROGSFUNC_step3_functions: EC_copynumbers_predicted.tsv and FROGSFUNC_step3_functions: PHENO_copynumbers_predicted.tsv )
+
+**-Report file-**: (report.html)

 .. image:: FROGS_frogsfunc_functions_piechart.png
-    :height: 500
-    :width: 1352
+    :height: 375
+    :width: 1014
+
+ASVs are excluded if the associated NSTI is above the threshold, or if the alignment values are below the thresholds.
+

-OTUs are out if the NSTI associated is above the threshold.
+.. image:: FROGS_frogsfunc_functions_starplot.png
+    :height: 466
+    :width: 806
+
+Number of different taxonomic ranks before (green) and after (orange) application of the filters.
+

 .. image:: FROGS_frogsfunc_functions_table.png
     :height: 580
-    :width: 1352
+    :width: 1452


 .. image:: FROGS_frogsfunc_functions_sunburst.png


-Gene families/function from KEGG or Metacyc databases are classified according to 3 hierarchy levels. The graph shows the proportion of each level within the selected samples.
-
-**Function abundance file**:
-
- It is the function abundance predictions of metagenome, per sample. (frogsfunc_functions_unstrat.tsv)
-
- - Classification column: the hierarchy classification of the gene function.
- - db_link column: the url on the link accession ID (*observation_name*) of the function.
- - observation_name: Accession identifier
- - last columns: Abundances of these functions in each samples.
-
-**Excluded sequences**:
-
- Information (FROGS taxonomy, PICRUSt2 taxonomy, NSTI) about removed sequences that have a NSTI value aboved the NSTI threshold chosen in this step.
-
-**Normalized OTU abundance table**:
-
- Table with normalized abundance per marker copy number. (frogsfunc_functions_marker_norm.tsv)
-
-**Weighted NSTI file**:
-
- It is the table with average NSTI calculated per sample. (frogsfunc_functions_weighted_nsti.tsv)
+Gene families/function from KEGG or Metacyc databases are classified according to 4 hierarchy levels. The graph shows the proportion of each level within the selected samples.
--- a/frogsfunc_pathways.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/frogsfunc_pathways.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -15,7 +15,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 -->
-<tool id="FROGSFUNC_step4_pathways" name="FROGSFUNC_step4_pathways" version= "@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+<tool id="FROGSFUNC_step4_pathways" name="FROGSFUNC_3_pathways" version= "@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
 	<description>Calculates pathway abundances in each sample. </description>

   <macros>
@@ -35,17 +35,11 @@
             --normalisation
           #end if
 	    --map $map_file.value
-
-          #if $category.value == "16S"
-            #if $map_file.fields.name == "Kegg"
-             --no-regroup
-            #end if
-          #end if
 	    --summary $summary_file
 	</command>
 	<inputs>
         <!-- Input files -->
-        <param argument="--input-file" format="tsv" name="input_file" type="data" label="Function abundance file" help="TSV function abundances table from FROGSFUNC_step3_function tool, frogsfunc_functions_unstrat.tsv (unstratified table)." optional="false"/>
+        <param argument="--input-file" format="tsv" type="data" label="Function abundance file" help="TSV function abundances table from FROGSFUNC_2_functions tool, FROGSFUNC_2_functions_unstrat_EC.tsv for Metacyc database or FROGSFUNC_2_functions_unstrat_KO.tsv for Kegg database (unstratified table)." optional="false"/>

         <!-- References -->
 	      <param name="category" type="select" label="Taxonomic marker" help="Taxonomic marker of interest." multiple="false" display="radio">
@@ -56,7 +50,7 @@
               <validator type="no_options" message="A built-in database is not available" />
           </options>
 			  </param>
-        <param name="map_file" type='select' label="Pathway reference" help="For 16S marker, choose Metacyc or KEGG in accordance with your choice in the FROGSFUNC_step2_copynumbers tool. For ITS or 18S marker, Metacyc is the only valid option." optional="false" multiple='false' display='radio'>
+        <param name="map_file" type='select' label="Pathway reference" help="For 16S marker, choose Metacyc or KEGG in accordance with your choice in the FROGSFUNC_1_placeseqs_copynumbers tool. For ITS or 18S marker, Metacyc is the only valid option." optional="false" multiple='false' display='radio'>
           <options from_data_table="frogs_picrust2_pathway_map">
             <column name='name' index='1'/>
             <column name='value' index='3'/>
@@ -70,7 +64,7 @@
         </inputs>
 	<outputs>
 		<data format="html" name="summary_file" label="${tool.name}: report.html" from_work_dir="report.html"/>
-		 <data format="tsv" name="abund" label="${tool.name}: frogsfunc_pathways_unstrat.tsv" from_work_dir="frogsfunc_pathways_unstrat.tsv"/>
+		 <data format="tsv" name="abund" label="${tool.name}: frogsfunc_pathways_unstrat.tsv" from_work_dir="frogsfunc_pathways_unstrat.tsv"/>
 	</outputs>

   <tests>
@@ -91,10 +85,10 @@

 What it does

-FROGSFUNC_step4_pathway is the last step of `PICRUSt2 &lt;https://github.com/picrust/picrust2&gt;`_. This script infers MetaCyc/KEGG pathway abundances based on EC/KO number abundances.
+**FROGSFUNC_3_pathways** is the last step of `PICRUSt2 &lt;https://github.com/picrust/picrust2&gt;`_. This script infers MetaCyc/KEGG pathway abundances based on **EC** or **KO** number abundances.

-    - Regroups EC/KO numbers to MetaCyc/KEGG reactions.
-    - Infers which MetaCyc/KEGG pathways are present based on these reactions with `MinPath &lt;http://omics.informatics.indiana.edu/MinPath/&gt;`_.
+    - Regroups EC or KO numbers to MetaCyc or KEGG reactions, depending of the unstrat abundances input file.
+    - Infers which MetaCyc or KEGG pathways are present based on these reactions with `MinPath &lt;http://omics.informatics.indiana.edu/MinPath/&gt;`_.
     - Calculates and returns the abundance of pathways identified as present.

 .. class:: infomark page-header h2
@@ -105,23 +99,23 @@

 Input

-**Function prediction abundance file**:
+**-Function prediction abundance file (EC or KO)-**:

- TSV function abundances table from FROGSFUNC_step3_function tool, frogsfunc_functions_unstrat.tsv (unstratified table).
+ TSV function abundances table from FROGSFUNC_2_functions tool, frogsfunc_functions_unstrat_EC.tsv or frogsfunc_functions_unstrat_KO.tsv (unstratified table).

-**Taxonomic marker:**
+**-Taxonomic marker-**:

- Output table of predicted marker gene copy numbers per sequence from FROGSFUNC_step2_copynumbers tool. (frogsfunc_copynumbers_marker.tsv)
+ Output table of predicted marker gene copy numbers per sequence from FROGSFUNC_1_placeseqs tool. (frogsfunc_marker.tsv)

-**Pathway reference:**
+**-Pathway reference-**:

  Mapping of pathways to reactions.

- - For 16S marker, choose Metacyc or KEGG in accordance with your choice in the FROGSFUNC_step2_copynumbers tool. If you want both, run this tool twice.
+ - For 16S marker, choose Metacyc or KEGG in accordance with your choice in the FROGSFUNC_2_functions tool. If you want both, run this tool twice.
  - For ITS or 18S marker, Metacyc is the only valid option.


-**Do you want to normalize the final output table ?**:
+**-Do you want to normalize the final output table ?-**:

  If this option is set, the pathway abundances file (frogsfunc_functions_unstrat.tsv) is normalized: values are divided by sum of columns, then multiplied by 10^6 (Count Per Million values).

@@ -133,16 +127,14 @@

 Outputs

-**HTML report**:
+**-HTML report-**:

  The HTML file summarizes information about pathway abundances within each sample.

 .. image:: FROGS_frogsfunc_pathways_sunburst.png


-
-
-**Pathways abundances tables - unstratified**:
+**-Pathways abundances tables - unstratified-**:

  It is the pathways abundance predictions of metagenome, per sample. (frogsfunc_pathways_unstrat.tsv)
--- a/frogsfunc_placeseqs.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/frogsfunc_placeseqs.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -15,16 +15,14 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 -->
-<tool id="FROGSFUNC_step1_placeseqs" name="FROGSFUNC_step1_placeseqs" version= "@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
-	<description>Places the OTUs into a reference phylogenetic tree.</description>
+<tool id="FROGSFUNC_step1_placeseqs" name="FROGSFUNC_1_placeseqs_and_copynumbers" version= "@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+	<description>Places ASVs into a reference phylogenetic tree.</description>

-    <macros>
+  <macros>
         <import>macros.xml</import>
-    </macros>
+  </macros>

-    <expand macro="requirements_frogsfunc">
-        <requirement type="package" version="3.1.1">ete3</requirement>
-    </expand>
+  <expand macro="requirements_frogsfunc" />

     <stdio>
         <exit_code range="1:" />
@@ -41,30 +39,37 @@
 	   		--output-biom $output_biom
 	   		--output-fasta $output_fasta
 	   		--closests-ref $closests_ref
+			--output-marker $output_marker
 	   		--summary $summary_file

 	   		#if $marker.name != "16S"
 	   		--ref-dir ${marker.value}
+			--input-marker-table ${marker.fields.traits}
 	   		#end if
 	</command>
 	<inputs>
     	<!-- Input files -->
-		<param argument="--input-fasta" format="fasta" name="input_fasta" type="data" label="Sequence file" help="The sequence file to analyse (format: fasta)." optional="false"/>
-		<param argument="--input-biom" format="biom1" name="input_biom" type="data" label="Biom file" help="The abundance file to analyse (format: biom). Taxonomic affiliations must be inside (FROGS Affiliation OTU step)." optional="false"/>
+		<param argument="--input-fasta" format="fasta" type="data" label="Sequence file" help="The sequence file to analyse (format: fasta)." optional="false"/>
+		<param argument="--input-biom" format="biom1" type="data" label="Biom file" help="The abundance file to analyse (format: biom). Taxonomic affiliations must be inside (FROGS taxonomic_affiliation step)." optional="false"/>

     	<!-- Parameters -->
         <param argument="--ref-dir" name="marker" type="select" label="Taxonomy marker" display="radio" help="Taxonomic marker of interest.">
 			<options from_data_table="frogs_picrust2_default_dir">
 	     		<column name="name" index="1"/>
 	        	<column name="value" index="2"/>
+				<column name="traits" index="3"/>
+                <filter type="unique_value" column='1'/>
 					<validator type="no_options" message="A built-in database is not available" />
 			</options>
 		</param>
-        <param argument="--placement-tool" name="placement_tool" type="select" label="Placement tool" help="Placement tool for insertion of sequences into the reference tree. SEPP is a low-memory alternative to EPA-ng for placing sequences." multiple="false" display="radio">
-        	<option value="epa-ng">epa-ng</option>
-        	<option value="sepp">sepp</option>
+        <param argument="--placement-tool" type="select" label="Placement tool" help="Placement tool for insertion of sequences into the reference tree. SEPP is a low-memory alternative to EPA-ng for placing sequences, and is only available for 16S analysis." multiple="false" display="radio">
+			<options from_data_table="frogs_picrust2_default_dir">
+				<column name='name' index='4' />
+				<column name='value' index='4' />
+                <filter type="param_value" ref="marker" column="2" />
+			</options>
 		</param>
-		<param argument="--min-align" name="min_align" type="float" label="Minimum alignment length" help="Proportion of the total length of an input sequence that must align with reference sequences. All other will be out. (default: 0.80)" value="0.8" min="0" max="1" optional="false" />
+		<param argument="--min-align" type="float" label="Minimum alignment length" help="Proportion of the total length of an input sequence that must align with reference sequences. All others will be out. (default: 0.80)" value="0.8" min="0" max="1" optional="false" />
 	</inputs>

 	<outputs>
@@ -73,7 +78,8 @@
 		<data format="tsv" name="excluded" label="${tool.name}: frogsfunc_placeseqs_excluded.tsv" from_work_dir="frogsfunc_placeseqs_excluded.tsv"/>
 		<data format="fasta" name="output_fasta" label="${tool.name}: frogsfunc_placeseqs.fasta" from_work_dir="frogsfunc_placeseqs.fasta"/>
 		<data format="tsv" name="closests_ref" label="${tool.name}: frogsfunc_placeseqs_closests_ref_sequences.txt" from_work_dir="frogsfunc_placeseqs_closests_ref_sequences.txt"/>
-		<data format="biom1" name="output_biom" label="${tool.name}: frogsfunc_placeseqs.biom" from_work_dir="frogsfunc_placeseqs.biom"/>
+		<data format="biom1" name="output_biom" label="${tool.name}: frogsfunc_placeseqs.biom" from_work_dir="frogsfunc_placeseqs.biom"/>
+		<data format="tsv" name="output_marker" label="${tool.name}: frogsfunc_marker.tsv" from_work_dir="frogsfunc_marker.tsv"/>
 	</outputs>

 	<tests>
@@ -90,6 +96,7 @@
 			<output name="output_tree" file="references/25-frogsfunc_placeseqs_tree.nwk" compare="diff" lines_diff="0" />
 			<output name="closests_ref" file="references/25-frogsfunc_placeseqs_closests_ref_sequences.txt" compare="diff" lines_diff="0" />
 			<output name="summary_file" file="references/25-frogsfunc_placeseqs_report.html" compare="diff" lines_diff="0" />
+			<output name="output_marker" file="references/26-frogsfunc_marker.tsv" compare="diff" lines_diff="0" />
 		</test>
 	</tests>

@@ -101,13 +108,16 @@

 What it does

-FROGSFUNC_step1_placeseqs is the first step of PICRUSt2. It inserts your study sequences into a reference tree (`details &lt;https://github.com/picrust/picrust2/wiki/Sequence-placement&gt;`_). By default, this reference tree is based on 20,000 16S sequences from genomes in the `Integrated Microbial Genomes database &lt;https://img.jgi.doe.gov/&gt;`_. The script performs this step, which specifically:
+**FROGSFUNC_1_placeseqs_copynumber** is the first step of PICRUSt2. It inserts your study sequences into a reference tree (`details &lt;https://github.com/picrust/picrust2/wiki/Sequence-placement&gt;`_). By default, this reference tree is based on 20,000 16S sequences from genomes in the `Integrated Microbial Genomes database &lt;https://img.jgi.doe.gov/&gt;`_. The script performs this step, which specifically:

     - Aligns your study sequences with a multiple-sequence alignment of reference 16S, ITS or 18S sequences with `HMMER &lt;http://hmmer.org&gt;`_.

     - Finds the most likely placements of your study sequences in the reference tree with `EPA_NG &lt;https://github.com/Pbdas/epa-ng#build-instructions&gt;`_ or `SEPP &lt;https://github.com/smirarab/sepp&gt;`_.

     - Produces a treefile with the most likely placement for each sequence as the new tips with `GAPPA &lt;https://github.com/lczech/gappa&gt;`_.
+
+    - Predicts marker copy number based solely on the sequences of marker genes with PICRUSt2. The available marker genes are 16S, ITS and 18S.
+

 .. class:: infomark page-header h2

@@ -118,16 +128,16 @@
 Input


-**Sequence file** (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_):
+**-Sequence file-** (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_):

- The OTUs fasta sequence file.
+ The ASVs fasta sequence file.

-**Biom file** (format `biom1 &lt;http://biom-format.org/documentation/format_versions/biom-1.0.html&gt;`_):
+**-Biom file-** (format `biom1 &lt;http://biom-format.org/documentation/format_versions/biom-1.0.html&gt;`_):

- The OTUs biom file. Taxonomic affiliations must be done before (bion file form FROGS Affiliation OTU tool).
+ The ASVs biom file. Taxonomic affiliations must be done before (biom file from FROGS taxonomic_affiliation tool).


-**Placement tool**
+**-Placement tool-**

  EPA-NG or SEPP. SEPP is a low-memory alternative to EPA-ng for placing sequences.

@@ -143,66 +153,88 @@

 Outputs

-**Newick file** (frogsfunc_placeseqs_tree.nwk):
+**-Tree file-** (FROGSFUNC_1_placeseqs_copynumber_tree.nwk):

  The phylogenetic tree output with insert sequences into the reference tree (format: newick). (format `nhx &lt;https://en.wikipedia.org/wiki/Newick_format&gt;`_).

-**Excluded sequence file** (frogsfunc_placeseqs_excluded.tsv):
+**-Excluded sequence file-** (FROGSFUNC_1_placeseqs_copynumber_excluded.tsv):

  List of sequences not inserted in the PICRUSt2 reference tree. Sequences are excluded if the total length of the input sequence aligned against reference sequence is less than the specified " Minimum alignment length " threshold (0.8 per default). These sequences are excluded for the next steps.

-**Closests reference sequences file** (frogsfunc_placeseqs_closests_ref_sequences.txt):
+**-Closest reference sequences file-** (FROGSFUNC_1_placeseqs_copynumber_closests_ref_sequences.txt):

  Information on the sequences from the PICRUST2 reference tree that are the closest neighbours of your studied sequences.

-**File of sequences placed in the phylogenetic tree** (frogsfunc_placeseqs.fasta)
+**-File of sequences placed in the phylogenetic tree-** (FROGSFUNC_1_placeseqs_copynumber.fasta):

  Sequence file of inserted sequences into PICRUST2 reference tree.

-**Abundance table** (frogsfunc_placeseqs.biom)
+**-Abundance table-** (FROGSFUNC_1_placeseqs_copynumber.biom):

- OTU abundance biom file of inserted sequences into reference tree.
+Biom file of ASV abundances with inserted sequences into PICRUSt2 reference tree.
+
+**-Marker copy number marker file-**:

+.. image:: FROGS_frogsfunc_copynumbers_marker.png

-**Report file** (report.html):
+This output table of predicted marker gene copy numbers per sequence. (frogsfunc_marker.tsv)
+Marker gene copy number prediction is used to normalize the counting matrices of metabolic functions (inside FROGSFUNC_2_functions). For example, if the ASV has two 16S copies, the abundance of metabolic function for this ASV will be divided by 2.
+
+**-Report file-** (report.html):

- The report file describes which OTUs are contained or not in the phylogenetic tree. Note that PICRUSt2 uses its own reference tree to affiliate OTUs/ASVs from reference sequences. The report file indicates for each OTU/ASV which is the closest PICRUSt2 reference sequence, and compares it to the original FROGS taxonomy. Clicking on the sequence ID gives you more information about it (`JGI database &lt;https://img.jgi.doe.gov/&gt;`_).
+The report file describes ASVs are contained or not in the phylogenetic tree. Note that PICRUSt2 uses its own reference tree to affiliate ASVs from reference sequences. The report file indicates for each ASV which is the closest PICRUSt2 reference sequence, and compares it to the original FROGS taxonomy. Clicking on the sequence ID gives you more information about it (`JGI database &lt;https://img.jgi.doe.gov/&gt;`_).

 .. image:: FROGS_frogsfunc_placeseqs_piecharts.png
 	:height: 290
 	:width: 676


-The pie charts describe the proportion of number of OTUs/ASVs excluded and the proportion of total sequences excluded for the following steps.
-OTUs/ASVs are excluded if the total length of the input sequence aligned against reference sequence is less than the specified " Minimum alignment length " threshold parameter.
+The pie charts describe the proportion of number of excluded ASVs and the proportion of total excluded sequences for the following steps.
+ASVs are excluded if the total length of the input sequence aligned against reference sequence is less than the specified " Minimum alignment length " threshold parameter.

 .. image:: FROGS_frogsfunc_placeseqs_table_JGI.png
-	:height: 580
-	:width: 1220
+	:height: 792
+	:width: 1599

-* **Cluster** : OTU name.
+* **ASV** : ASV name.

-* **Nb sequences** : OTU sequence abundances.
+* **Nb sequences** : ASV sequence abundances.

-* **FROGS Taxonomy** : Taxonomic affiliation made by FROGS (FROGS Affiliation OTU).
+* **FROGS Taxonomy** : Taxonomic affiliation made by FROGS (FROGS taxonomic_affiliation).

-* **PICRUSt2 closest ID (JGI)** : Identifiant (JGI) of the closest reference sequence from the OTU inserted in the reference tree (see the explanatory illustration at the bottom of this page).
+* **PICRUSt2 closest ID (JGI)** : Identifiant (JGI) of the closest reference sequence from the inserted ASV in the reference tree (see the explanatory illustration at the bottom of this page).

 * **PICRUSt2 closest reference name** : Genome Name / Sample Name.

-* **PICRUSt2 closest taxonomy** : Taxonomy (JGI) of the closest reference sequence from the OTU inserted in the reference tree under the following format: Kingdom;Phylum;Class;Order;Family;Genus;Species
+* **PICRUSt2 closest taxonomy** : Taxonomy (JGI) of the closest reference sequence from the ASV inserted in the reference tree under the following format: Kingdom;Phylum;Class;Order;Family;Genus;Species

-* **NSTI** : Nearest Sequenced Taxon Index (`NSTI &lt;https://www.nature.com/articles/nbt.2676&gt;`_) is the phylogenetic distance between the OTU and the nearest sequenced reference genome. This metric can be used to identify OTUs that are highly distant from all reference sequences (the predictions for these sequences are less reliable!). The higher the NSTI score, the less the affiliations are relevant. Any OTUs with a NSTI value higher than 2 are typically either from uncharacterized phyla or off-target sequences.
+* **NSTI** : Nearest Sequenced Taxon Index (`NSTI &lt;https://www.nature.com/articles/nbt.2676&gt;`_) is the phylogenetic distance between the ASV and the nearest sequenced reference genome. This metric can be used to identify ASVs that are highly distant from all reference sequences (the predictions for these sequences are less reliable!). The higher the NSTI score, the less the affiliations are relevant. Any ASVs with a NSTI value higher than 2 are typically either from uncharacterized phyla or off-target sequences.

 * **NSTI confidence** : According to the NSTI score, we guide you in the confidence you can bring to the issue affiliation of PICRUSt2.

 * **Lowest same taxonomic rank between FROGS and PICRUSt2** : Comparison between FROGS and PICRUSt2 taxonomic affiliations. Lowest common taxonomic rank between FROGS and PICRUSt2 affiliations.

-* **Comment** : " identical taxonomy " if the FROGS and PICRUSt2 taxonomic affiliations are identical. " identical sequence " if the OTU sequence is strictly the same as the reference sequence.
+* **Comment** : " identical taxonomy " if the FROGS and PICRUSt2 taxonomic affiliations are identical. " identical sequence " if the ASV sequence is strictly the same as the reference sequence.

 .. image:: FROGS_frogsfunc_placeseqs_closest_explained.png
+    :height: 269
+    :width: 825
+
+Closest reference sequence (from JGI database) from one ASV sequence.

-Closest reference sequence (from JGI database) from one cluster sequence.
+.. image:: FROGS_frogsfunc_placeseqs_nsti.png
+    :height: 295
+    :width: 778
+
+Nearest Sequenced Taxon Index (NSTI) is the phylogenetic distance between the ASV and the nearest sequenced reference genome. This metric can be used to identify ASVs that are highly distant from all reference sequences but the predictions for these sequences are less reliable. The higher the NSTI score, the less the affiliations are relevant. Any ASV with a NSTI value higher than 2 are typically either from uncharacterized phyla or off-target sequences.
+
+The graph shows the number of kept ASVs and sequences according to the NSTI threshold. It is a decision support graphic to help choose the NSTI threshold. This NSTI threshold will be asked to set in the next tool FROGSFUNC_2_functions. A good practice is to choose a NSTI threshold that retains a good number of sequences and as low as possible i.e. while ensuring that the taxonomies derived from FROGS and PICRUSt2 do not diverge too much.
+
+.. image:: FROGS_frogsfunc_placeseqs_blast.png
+    :height: 295
+    :width: 920
+
+The graph depicts the blast percentages of identity and coverage against the closest PICRUSt2 sequence (ordinate), against the NSTI score (abcsissa). Thus, the ASVs with the best predictions will be located at the top left of the graph.

 @HELP_CONTACT@
--- a/itsx.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/itsx.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -9,7 +9,6 @@
     <command detect_errors="exit_code">
 	itsx.py
 	    @CPUS@
-	    --region '$region'
 	    --input-fasta '$input_fasta'
 	    --input-biom '$input_biom'
 	    --out-fasta '$out_fasta'
@@ -17,17 +16,34 @@
 	    --out-removed '$out_excluded'
 	    --summary '$summary_file'
 	    --organism-groups '$organism_groups'
-	    $trim_sequence
+
+        #if $trim_sequence.check_its_only == "yes"
+            --check-its-only
+       #else
+         --region $trim_sequence.region
+       #end if
+
     </command>
     <inputs>
         <!-- Files -->
         <param format="fasta" name="input_fasta" type="data" label="Sequence file" help="The sequence file to filter (format: FASTA)." />
         <param format="biom1" name="input_biom" type="data" label="Abundance file" help="The abundance file to filter (format: BIOM)" />
-        <param argument="--region" type="select" label="ITS region" help="Which fungal ITS region is targeted: either ITS1 or ITS2 ?">
-            <option value="ITS1">ITS1</option>
-            <option value="ITS2">ITS2</option>
-        </param>
-        <param argument="--check-its-only" name="trim_sequence" type="boolean" checked="false" truevalue="" falsevalue="--check-its-only" label="Trim conserved sequence (SSU, 5.8S, LSU) ?" help="If Yes, only part of the sequences with ITS signature will be kept, SSU, LSU or 5.8S regions will be trimmed (default : No)" />
+
+        <conditional name="trim_sequence">
+			<param argument="--check-its-only" type="select" label="Trim conserved sequence (SSU, 5.8S, LSU) ?" help="If Yes, only part of the sequences with ITS signature will be kept, SSU, LSU or 5.8S regions will be trimmed (default : No)" display="radio">
+            	<option value="yes" selected="true">No, keep conserved regions</option>
+            	<option value="no" >Yes, trim conserved regions</option>
+
+			</param>
+		<when value="yes"/>
+		<when value="no">
+			<param argument="--region" type="select" label="ITS region" help="Which fungal ITS region is targeted: either ITS1 or ITS2 ?" display="radio">
+                <option value="ITS1">ITS1</option>
+                <option value="ITS2">ITS2</option>
+            </param>
+		</when>
+		</conditional>
+
         <param argument="--organism-groups" type="select" multiple="true" display="checkboxes" label="Choose pertinent organisms to scan:" help="Save a lot of time by checking pertinent organism group model to scan">
             <option value="F" selected="true">Fungi</option>
             <option value="A">Alveolata</option>
--- a/macros.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/macros.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -24,6 +24,7 @@
     <xml name="requirements_frogsfunc">
         <expand macro="requirements">
             <requirement type="package" version="2.5.1">picrust2</requirement>
+            <requirement type="package" version="3.1.2">ete3</requirement>
             <yield />
         </expand>
     </xml>
@@ -89,7 +90,7 @@
                 pages = {1287-1294},
                 year = {2018},
                 month = {04},
-                abstract = "{Metagenomics leads to major advances in microbial ecology and biologists need user friendly tools to analyze their data on their own.This Galaxy-supported pipeline, called FROGS, is designed to analyze large sets of amplicon sequences and produce abundance tables of Operational Taxonomic Units (OTUs) and their taxonomic affiliation. The clustering uses Swarm. The chimera removal uses VSEARCH, combined with original cross-sample validation. The taxonomic affiliation returns an innovative multi-affiliation output to highlight databases conflicts and uncertainties. Statistical results and numerous graphical illustrations are produced along the way to monitor the pipeline. FROGS was tested for the detection and quantification of OTUs on real and in silico datasets and proved to be rapid, robust and highly sensitive. It compares favorably with the widespread mothur, UPARSE and QIIME.Source code and instructions for installation: https://github.com/geraldinepascal/FROGS.git. A companion website: http://frogs.toulouse.inra.fr.Supplementary data are available at Bioinformatics online.}",
+                abstract = "{Metagenomics leads to major advances in microbial ecology and biologists need user friendly tools to analyze their data on their own.This Galaxy-supported pipeline, called FROGS, is designed to analyze large sets of amplicon sequences and produce abundance tables of ASVs and their taxonomic affiliation. The clustering uses Swarm. The chimera removal uses VSEARCH, combined with original cross-sample validation. The taxonomic affiliation returns an innovative multi-affiliation output to highlight databases conflicts and uncertainties. Statistical results and numerous graphical illustrations are produced along the way to monitor the pipeline. FROGS was tested for the detection and quantification of ASVs on real and in silico datasets and proved to be rapid, robust and highly sensitive. It compares favorably with the widespread mothur, UPARSE and QIIME.Source code and instructions for installation: https://github.com/geraldinepascal/FROGS.git. A companion website: http://frogs.toulouse.inra.fr.Supplementary data are available at Bioinformatics online.}",
                 issn = {1367-4803},
                 doi = {10.1093/bioinformatics/btx791},
                 url = {https://doi.org/10.1093/bioinformatics/btx791},
--- a/normalisation.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/normalisation.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -16,7 +16,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 -->
 <tool id="FROGS_normalisation" name="FROGS Abundance normalisation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
-        <description>Normalise OTU abundance.</description>
+        <description>Normalise ASV abundance.</description>

     <macros>
         <import>macros.xml</import>
@@ -36,8 +36,8 @@
 		                       #if $sampling_method.sampling_by_min == "yes"
 		                       	 --sampling-by-min
 		                       #else
-		                         --num-reads $sampling_method.num_reads
-		                       	 #if $sampling_method.delete_samples
+		                         --num-reads '$sampling_method.num_reads'
+		                       	 #if $sampling_method.delete_samples == "true"
 		                           --delete-samples
 		                         #end if
 		                       #end if
@@ -46,16 +46,20 @@
 		                       --summary-file '$summary_file'
 	</command>
 	<inputs>
-		<param format="fasta" name="input_fasta" type="data" label="Sequence file" help="Sequence file to normalise (format: fasta)." />
-		<param format="biom1" name="input_biom" type="data" label="Abundance file" help="Abundance file to normalise (format: BIOM)." />
+		<param format="fasta" argument="--input-fasta" type="data" label="Sequence file" help="Sequence file to normalise (format: fasta)." />
+		<param format="biom1" argument="--input-biom" type="data" label="Abundance file" help="Abundance file to normalise (format: BIOM)." />
 		<conditional name="sampling_method">
-			<param name="sampling_by_min" type="select" label="Sampling method" help='Sampling by the number of sequences of the smallest sample, or select a number manually' display='radio'>
+			<param argument="--sampling-by-min" type="select" label="Sampling method" help="Sampling by the number of sequences of the smallest sample, or select a number manually" display="radio">
             	<option value="yes" selected="true">Sampling by the number of sequences of the smallest sample</option>
             	<option value="no">Select a number of sequences</option>
 			</param>
+		<when value="yes"/>
 		<when value="no">
-			<param name="num_reads" type="integer" optional="true" min="1" value="" label="Number of reads" help="The final number of reads per sample." />
-			<param name="delete_samples" type="boolean" label="Remove samples that have an initial number of reads below the number of reads to sample ?"  />
+			<param argument="--num-reads" type="integer" optional="true" min="1" value="" label="Number of reads" help="The final number of reads per sample." />
+			<param argument="--delete-samples" type="select" label="Remove samples that have an initial number of reads below the number of reads to sample ?" display="radio">
+                <option value="false">No, subsampling threshold need to at most equal to the smallest sample</option>
+            	<option value="true">Yes, subsampling threshold may be greater than the smallest sample</option>
+            </param>
 		</when>
 		</conditional>
 	</inputs>
@@ -101,7 +105,7 @@

 **Abundance file**:

-The abundance of each OTU in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).
+The abundance of each ASV in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).

 **Sampling method**:
--- a/phyloseq_alpha_diversity.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/phyloseq_alpha_diversity.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -71,7 +71,7 @@
 Input

 **Data file** (format RData):
-One phyloseq object containing OTU abundance table, their taxonomies (and optionnaly a phylogenetics tree, and the sample experiment metadata.
+One phyloseq object containing ASV abundance table, their taxonomies (and optionnaly a phylogenetics tree, and the sample experiment metadata.
 This file is the result of "FROGS Phyloseq Import Data tool".

 .. class:: h3
--- a/phyloseq_beta_diversity.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/phyloseq_beta_diversity.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -78,7 +78,7 @@
 Input

 **data file** (format RData):
-One phyloseq object containing OTU abundance table, their taxonomies and optionnaly a phylogenetic tree, and the sample experiment metadata.
+One phyloseq object containing ASV abundance table, their taxonomies and optionnaly a phylogenetic tree, and the sample experiment metadata.
 This file is the result of "FROGS Phyloseq Import Data tool".

 **distance methods** :
--- a/phyloseq_composition.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/phyloseq_composition.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -69,7 +69,7 @@

 What it does

-Using `phyloseq &lt;https://joey711.github.io/phyloseq/&gt;`_ and custom R function, this tool constructs two plots to visualise the sample composition: one at the OTU level and another one at the specified aggregation level (*e.g.* Phylum) after keeping only a subset of taxa (*e.g.* Bacteria at the level Kingdom). It helps answer the question: "What is the composition at the Phylum level within Bacteria?". By default, the plot exhibits only the abundance of the 9 most abundant taxa (as specified). In general, the representation of more than 10 taxa is hard to read on plots.
+Using `phyloseq &lt;https://joey711.github.io/phyloseq/&gt;`_ and custom R function, this tool constructs two plots to visualise the sample composition: one at the ASV level and another one at the specified aggregation level (*e.g.* Phylum) after keeping only a subset of taxa (*e.g.* Bacteria at the level Kingdom). It helps answer the question: "What is the composition at the Phylum level within Bacteria?". By default, the plot exhibits only the abundance of the 9 most abundant taxa (as specified). In general, the representation of more than 10 taxa is hard to read on plots.


 .. class:: infomark page-header h2
@@ -81,7 +81,7 @@
 Input

 **data file** (format rdata):
-One phyloseq object containing the OTU abundance table, their taxonomies and optionnaly a phylogenetic tree, and the sample experiment metadata.
+One phyloseq object containing the ASV abundance table, their taxonomies and optionnaly a phylogenetic tree, and the sample experiment metadata.
 this file is the result of FROGS Phyloseq Import Data tool.

 .. class:: h3
@@ -90,20 +90,20 @@

 **html file** (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_): data composition plots.

-Bar plot of OTUs is colored with aggregated taxonomic level *i.e* "Phylum" :
+Bar plot of ASVs is colored with aggregated taxonomic level *i.e* "Phylum" :

  .. image:: FROGS_Phyloseq_bar_plot.png
      :height: 646
      :width: 800

-Composition plot: plot the most abundant sub taxonomic level among a selection of OTUs.
+Composition plot: plot the most abundant sub taxonomic level among a selection of ASVs.

-- Selection of OTUs:
+- Selection of ASVs:

     - Taxonomic level name to subset: Kingdom
     - Taxon name: Bacteria

-- Aggregation of OTUs :
+- Aggregation of ASVs :

     - Taxonomic level used to agglomerate: Phylum
--- a/phyloseq_import_data.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/phyloseq_import_data.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -12,7 +12,7 @@
                 --treefile '$treefile'
             #end if
             --ranks $ranks
-            #if $normalisation
+            #if $normalisation == "true"
                 --normalisation
             #end if
             --html '$html'
@@ -20,18 +20,21 @@
     </command>
     <inputs>
         <!-- Files -->
-        <param format="biom1" argument="--biomfile" type="data" label="Abundance biom file with taxonomical metadata (format: BIOM)" help="The file contains the  OTU informations "/>
-        <param format="tabular,tsv" argument="--samplefile" type="data" label="Sample file (format: TSV)" help="The file contains the samples information."/>
-        <param format="nhx" argument="--treefile" type="data" optional="true" label="Tree file (format: Newick)" help="The file contains the tree information (optional)"/>
+        <param format="biom1" argument="--biomfile" type="data" label="Abundance biom file with taxonomical metadata (format: BIOM)" help="The file contains the  ASV information "/>
+        <param format="tabular,tsv" argument="--samplefile" type="data" label="Metadata associated to samples (format: TSV)" help="The file contains the metadata that characterise each sample."/>
+        <param format="nhx" argument="--treefile" type="data" optional="true" label="Taxonomic tree file (format: Newick)" help="The file contains the taxonomic tree information from FROGS Tree tool (optional)"/>
         <!-- Parameters -->
         <param argument="--ranks" type="text" value="Kingdom Phylum Class Order Family Genus Species"
             label="Names of taxonomic levels" help="The ordered taxonomic levels stored in BIOM. Each level is separated by one space">
             <expand macro="sanitizer_validator"/>
         </param>
-        <param argument="--normalisation" type="boolean" label="Do you want to normalise your data ?" help="To normalise data before statistical analysis (default : No)"/>
+        <param argument="--normalisation" type="select" label="Do you want to normalise your data ?" help="To normalise data before statistical analysis (default : No)" display="radio">
+            <option value="false">No, keep abundance as it is.</option>
+            <option value="true">Yes, subsample abundances to the smallest sample size.</option>
+        </param>
     </inputs>
     <outputs>
-        <data format="rdata" name="data" label="${tool.name}: data.Rdata" from_work_dir="data.Rdata"/>
+        <data format="rdata" name="data" label="${tool.name}: asv_data.Rdata" from_work_dir="asv_data.Rdata"/>
         <data format="html" name="html" label="${tool.name}: report.nb.html" from_work_dir="report.nb.html"/>
     </outputs>
     <tests>
@@ -79,9 +82,9 @@

 Input

-**OTU biom file**:
+**ASV abundance file (in biom format)**:

-The OTU biom file (format `biom1 &lt;http://biom-format.org/documentation/format_versions/biom-1.0.html&gt;`_).
+The ASV biom file (format `biom1 &lt;http://biom-format.org/documentation/format_versions/biom-1.0.html&gt;`_).
 The example of biom file:

   .. image:: FROGS_Phyloseq_biomfile.png
@@ -94,7 +97,7 @@

   .. image:: FROGS_nwk_treefile.png

-**Sample file**:
+**Metadata file**:
 The file contains the conditions of experiment with sample ID in the first column:

   .. image:: FROGS_Phyloseq_samplefile.png
@@ -105,11 +108,11 @@

 Output

-**Html file** (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_): The summary of phyloseq object.
+**Html report file** (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_): The summary of phyloseq object.

   .. image:: FROGS_Phyloseq_import_data_html.png

-**Data file** (format rdata): The information of data in one phyloseq object.
+**Data file** (format Rdata): The information of data in one phyloseq object.


 .. class:: infomark page-header h2
--- a/preprocess.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/preprocess.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -15,7 +15,7 @@
         --min-amplicon-size $sequencer_type.min_amplicon_size
         --max-amplicon-size $sequencer_type.max_amplicon_size
         #if $sequencer_type.sequencer_selected in ('illumina', 'longreads')
-            #if $sequencer_type.is_primer_in_seq.primer_choice
+            #if $sequencer_type.is_primer_in_seq.primer_choice == "true"
                 --five-prim-primer '$sequencer_type.is_primer_in_seq.five_prim_primer'
                 --three-prim-primer '$sequencer_type.is_primer_in_seq.three_prim_primer'
             #else
@@ -38,7 +38,7 @@
                 #if $sequencer_type.input_type.archive_type.merge_software_type.merge_software == "flash"
                     --expected-amplicon-size $sequencer_type.input_type.archive_type.merge_software_type.expected_amplicon_size
                 #end if
-                #if $sequencer_type.input_type.archive_type.keep_unmerged
+                #if $sequencer_type.input_type.archive_type.keep_unmerged == "Yes"
                     --keep-unmerged
                 #end if
             #end if
@@ -67,7 +67,7 @@
                     #if $sequencer_type.input_type.files_by_samples_type.merge_software_type.merge_software == "flash"
                         --expected-amplicon-size $sequencer_type.input_type.files_by_samples_type.merge_software_type.expected_amplicon_size
                     #end if
-                    #if $sequencer_type.input_type.files_by_samples_type.keep_unmerged
+                    #if $sequencer_type.input_type.files_by_samples_type.keep_unmerged == "Yes"
                         --keep-unmerged
                     #end if
                 #end if
@@ -119,7 +119,10 @@
                                     </when>
                                     <when value="vsearch"></when>
                                 </conditional>
-                                <param argument="--keep-unmerged" type="boolean" label="Would you like to keep unmerged reads?" help="No = Unmerged reads will be excluded; Yes = unmerged reads will be artificially combined with 100 N. (default No)" />
+                                <param argument="--keep-unmerged" type="select" label="Would you like to keep unmerged reads?" help="No = Unmerged reads will be excluded; Yes = unmerged reads will be artificially combined with 100 N. (default No)" display="radio">
+                                    <option value="No" selected="true">No, unmerged reads will be excluded.</option>
+                                    <option value="Yes">Yes, unmerged reads will be artificially combined.</option>
+                                </param>
                             </when>
                             <when value="already_merged"></when>
                         </conditional>
@@ -153,7 +156,10 @@
                                     </when>
                                     <when value="vsearch"></when>
                                 </conditional>
-                                <param argument="--keep-unmerged" type="boolean" label="Would you like to keep unmerged reads?" help="No = Unmerged reads will be excluded; Yes = unmerged reads will be artificially combined with 100 N. (default No)" />
+                                <param argument="--keep-unmerged" type="select" label="Would you like to keep unmerged reads?" help="No = Unmerged reads will be excluded; Yes = unmerged reads will be artificially combined with 100 N. (default No)" display="radio">
+                                    <option value="No" selected="true">No, unmerged reads will be excluded</option>
+                                    <option value="Yes">Yes, unmerged reads will be artificially combined.</option>
+                                </param>
                             </when>
                             <when value="already_merged">
                                 <repeat name="samples" title="Samples" min="1">
@@ -171,7 +177,10 @@
                 <param argument="--max-amplicon-size" type="integer" value="" label="Maximum amplicon size" help="The maximum size of the amplicons (with primers)"/>
                 <!-- Primers -->
                 <conditional name="is_primer_in_seq">
-                    <param name="primer_choice" type="boolean" checked="true" label="Do the sequences have PCR primers?" help=""/>
+                    <param name="primer_choice" type="select" label="Do the sequences have PCR primers?" help="" display="radio">
+                        <option value="true" selected="true">Yes</option>
+                        <option value="false">No</option>
+                    </param>
                     <when value="true">
                         <param argument="--five-prim-primer" type="text"  label="5' primer" help="The 5' primer sequence (wildcards are accepted). This primer must be written in 5' to 3' orientation (see details in 'Primers parameters' help section)">
                             <sanitizer invalid_char="">
@@ -216,7 +225,10 @@

                 <!-- Primers -->
                 <conditional name="is_primer_in_seq">
-                    <param name="primer_choice" type="boolean" checked="true" label="Do the sequences have PCR primers?" help=""/>
+                    <param name="primer_choice" type="select" label="Do the sequences have PCR primers?" help="" display="radio">
+                        <option value="true" selected="true">Yes</option>
+                        <option value="false">No</option>
+                    </param>
                     <when value="true">
                         <param argument="--five-prim-primer" type="text"  label="5' primer" help="The 5' primer sequence (wildcards are accepted). This primer must be written in 5' to 3' orientation (see details in 'Primers parameters' help section)">
                             <sanitizer invalid_char="">
Binary file static/images/FROGS_DESeq2_html_ipath.png has changed
Binary file static/images/FROGS_DESeq2_html_table.png has changed
Binary file static/images/FROGS_Phyloseq_biomfile.png has changed
Binary file static/images/FROGS_Phyloseq_plot_heatmap_red.png has changed
Binary file static/images/FROGS_affiliation_filter_ignore.png has changed
Binary file static/images/FROGS_affiliation_filter_keep.png has changed
Binary file static/images/FROGS_affiliation_stats_alignment.png has changed
Binary file static/images/FROGS_affiliation_stats_bootstrap.png has changed
Binary file static/images/FROGS_affiliation_summary.png has changed
Binary file static/images/FROGS_cluster_fastidious.png has changed
Binary file static/images/FROGS_frogsfunc_copynumbers_nsti.png has changed
Binary file static/images/FROGS_frogsfunc_functions_starplot.png has changed
Binary file static/images/FROGS_frogsfunc_placeseqs_blast.png has changed
Binary file static/images/FROGS_frogsfunc_placeseqs_closest_explained.png has changed
Binary file static/images/FROGS_frogsfunc_placeseqs_nsti.png has changed
Binary file static/images/FROGS_frogsfunc_placeseqs_table_JGI.png has changed
Binary file static/images/FROGS_nwk_treefile.png has changed
Binary file static/images/FROGS_tree_otufile.png has changed
Binary file static/images/starplot_frogsfrunc_function.png has changed
--- a/taxonomic_affiliation.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/taxonomic_affiliation.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -1,5 +1,5 @@
 <tool id="FROGS_taxonomic_affiliation" name="FROGS_5 Taxonomic affiliation" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
-    <description>Taxonomic affiliation of each OTU's seed by RDPtools and BLAST</description>
+    <description>Taxonomic affiliation of each ASV's seed by RDPtools and BLAST</description>
     <macros>
         <import>macros.xml</import>
     </macros>
@@ -19,7 +19,7 @@
         --summary '$summary'
         @CPUS@
         --java-mem \$GALAXY_MEMORY_GB
-        #if $rdp
+        #if $rdp == "yes"
           --rdp
         #end if
     ]]></command>
@@ -29,7 +29,10 @@
             <options from_data_table="frogs_db"/>
             <validator type="no_options" message="A built-in database is not available"/>
         </param>
-        <param argument="--rdp" type="boolean" label="Also perform RDP assignation?" help="Taxonomy affiliation will be perform thanks to Blast. This option allows to perform it also with RDP classifier tool (default No)"/>
+        <param argument="--rdp" type="select" display="radio" label="Also perform RDP assignation?" help="Taxonomy affiliation will be perform thanks to Blast. This option allows to perform it also with RDP classifier tool (default No)">
+            <option value="yes" >Yes</option>
+            <option value="no" selected="true" >No</option>
+		</param>
         <expand macro="taxonomic_ranks"/>
         <!-- Files -->
         <param format="fasta" name="fasta_sequences" type="data" label="Sequence file" help="The sequences to affiliated (format: FASTA)"/>
@@ -44,7 +47,7 @@
             <param name="ref_file" value="ITS1_test"/>
             <param name="fasta_sequences" value="references/04-filters.fasta"/>
             <param name="biom_abundance" value="references/04-filters.biom"/>
-            <param name="rdp" value="true"/>
+            <param name="rdp" value="yes"/>
             <output name="biom_affiliation" file="references/06-affiliation.biom" compare="sim_size" delta="5"/>
             <output name="summary" file="references/06-affiliation.html" compare="diff" lines_diff="0"/>
         </test>
@@ -57,7 +60,7 @@

 What it does

-this tool adds taxonomic affiliation in abundance file.
+This tool adds taxonomic affiliations in abundance file.


 .. class:: infomark page-header h2
@@ -74,7 +77,7 @@

 **Abundance file**:

-The abundance of each OTU in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).
+The abundance of each ASV in each sample (format `BIOM &lt;http://biom-format.org/&gt;`_).

 .. class:: h3

@@ -82,11 +85,11 @@

 **Abundance file** (tax_affiliation.biom):

- The abundance file with affiliation (format `BIOM &lt;http://biom-format.org/&gt;`_).
+ The abundance file with affiliations (format `BIOM &lt;http://biom-format.org/&gt;`_).

 **Report file** (report.html):

- This file presents the number of sequences affiliated by blast, and the number of multi-affiliation (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_).
+ This file presents the number of affiliated sequences by blast, and the number of multi-affiliations (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_).

  .. image:: FROGS_affiliation_summary.png
    :height: 975
@@ -96,7 +99,7 @@

 Reference database

-All the databases we format (on demand) for RDPClassifier and NCBI Blast+ are inventoried here: http://genoweb.toulouse.inrae.fr/frogs_databanks/assignation/readme.txt
+All the available databases (on demand to frogs-support@inrae.fr) for RDPClassifier and NCBI Blast+ are listed here: http://genoweb.toulouse.inrae.fr/frogs_databanks/assignation/readme.txt

 .. class:: infomark page-header h2

@@ -107,9 +110,9 @@
    :widths: 5, 150
    :class: table table-striped

-   "1", "`RDPClassifier &lt;http://rdp.cme.msu.edu/classifier/classifier.jsp&gt;`_ may be used with database to associate to each OTU a taxonomy and a bootstrap (example: *Bacteria;(1.0);Firmicutes;(1.0);Clostridia;(1.0);Clostridiales;(1.0);Clostridiaceae 1;(1.0);Clostridium sensu stricto;(1.0);*)."
-   "2", "`blastn+ &lt;https://blast.ncbi.nlm.nih.gov/Blast.cgi&gt;`_ or `needlall &lt;http://emboss.sourceforge.net/apps/release/6.6/emboss/apps/needleall.html&gt;`_ is used to find alignment between each OTU and the database. Only the bests hits with the same score are reported. blastn+ is used for merged read pair, and needall is used for artificially combined sequence. For each alignment returned, several metrics are computed: identity percentage, coverage percentage, and alignment length"
-   "3", "For each OTU with several blastn+/needlall alignment results a consensus is determined on each taxonomic level. If all the taxa at a taxonomic rank are identical the taxon name is reported otherwise *Multi-affiliation* is reported. For example, if you have an OTU with two equivalent hits, associated to *Bacteria;Proteobacteria;Gamma Proteobacteria;Enterobacteriales*, and *Bacteria;Proteobacteria;Beta Proteobacteria;Methylophilales*, the consensus will be *Bacteria;Proteobacteria;Multi-affiliation;Multi-affiliation*."
+   "1", "`RDPClassifier &lt;http://rdp.cme.msu.edu/classifier/classifier.jsp&gt;`_ may be used with database to associate to each ASV a taxonomy and a bootstrap (example: *Bacteria;(1.0);Firmicutes;(1.0);Clostridia;(1.0);Clostridiales;(1.0);Clostridiaceae 1;(1.0);Clostridium sensu stricto;(1.0);*)."
+   "2", "`blastn+ &lt;https://blast.ncbi.nlm.nih.gov/Blast.cgi&gt;`_ or `needlall &lt;http://emboss.sourceforge.net/apps/release/6.6/emboss/apps/needleall.html&gt;`_ is used to find alignment between each ASV and the database. Only the bests hits with the same score are reported. blastn+ is used for merged read pair, and needall is used for artificially combined sequence. For each alignment returned, several metrics are computed: identity percentage, coverage percentage, and alignment length"
+   "3", "For each ASV with several blastn+/needlall alignment results a consensus is determined on each taxonomic level. If all the taxa at a taxonomic rank are identical the taxon name is reported otherwise *Multi-affiliation* is reported. For example, if you have an ASV with two equivalent hits, associated to *Bacteria;Proteobacteria;Gamma Proteobacteria;Enterobacteriales*, and *Bacteria;Proteobacteria;Beta Proteobacteria;Methylophilales*, the consensus will be *Bacteria;Proteobacteria;Multi-affiliation;Multi-affiliation*."

 .. class:: infomark page-header h2

@@ -120,18 +123,18 @@
 * **Case 1: a sequencing of overlapping sequences i.e. 16S V3-V4 amplicon MiSeq sequencing**

 .. image:: FROGS_affiliation_overlapped_percent_id.png
-    :height: 325
-    :width: 807
+    :height: 198
+    :width: 604

 * **Case 2 : a sequencing of non-overlapping sequences: case of ITS1 amplicon MiSeq sequencing**

 .. image:: FROGS_affiliation_combined_percent_id.png
-    :height: 310
-    :width: 887
+    :height: 232
+    :width: 664

 **- Finally, how percentage identity is computed ?**

-With the classical method of %id calculation, filtering on %id will systematically removed “FROGS combined” OTUs. So, we proposed to replace the classical %id by a %id computed on the sequenced bases only.
+With the classical method of %id calculation, filtering on %id will systematically removed “FROGS combined” ASVs. So, we proposed to replace the classical %id by a %id computed on the sequenced bases only.

 .. image:: FROGS_affiliation_percent_id_formula.png
     :height: 36
@@ -154,9 +157,9 @@

 Advices

-This tool can take large time. It is recommended to filter your OTU abundance and sequence files before this tool (see **FROGS OTU Filters**).
+This can be a long process. It is recommended to filter your ASV abundances and sequences before this step of taxonomic affiliation (see **FROGS cluster Filters**).

-As you can see the affiliation of each OTU is not human readable in outputed abundance file. We provide a tools to convert these BIOM file in tabulated file, see the **FROGS BIOM to TSV** tool.
+As you can see the affiliation of each ASV is not human readable in outputed abundance file. We provide a tools to convert this BIOM file in tabulated file, see the **FROGS BIOM to TSV** tool.


 @HELP_CONTACT@
--- a/tool-data/frogs_picrust2_default_dir.loc.sample	Fri Mar 10 14:03:08 2023 +0000
+++ b/tool-data/frogs_picrust2_default_dir.loc.sample	Thu Mar 30 06:09:01 2023 +0000
@@ -25,7 +25,7 @@
 #
 #The indicated path is the one if you do not change the default galaxy config of conda directory.
 #
-#<identifiant>	<marker_gene>	<path_to_default_dir>
+#<identifiant>	<marker_gene>	<path_to_default_dir> <path_to_marker_copy_numbers> <placement_tool>
 #
 #default dir must contain these files:
 #
@@ -36,6 +36,7 @@
 #-rwxrwxr-x 1 vdarbot vdarbot   600048 avril  1  2021 pro_ref.tre
 #
 # EXAMPLE FOR TEST :
-#picrust2_default_dir_16S	16S	<Galaxy_dir>/database/dependencies/_conda/envs/<PICRUSt2_env>/lib/python3.6/site-packages/picrust2/default_files/prokaryotic/pro_ref/
-#picrust2_default_dir_ITS	ITS	<Galaxy_dir>/database/dependencies/_conda/envs/<PICRUSt2_env>/lib/python3.6/site-packages/picrust2/default_files/fungi/fungi_ITS/
-#picrust2_default_dir_18S	18S	<Galaxy_dir>/database/dependencies/_conda/envs/<PICRUSt2_env>/lib/python3.6/site-packages/picrust2/default_files/fungi/fungi_18S/
+#picrust2_default_dir_16S	16S	<Galaxy_dir>/database/dependencies/_conda/envs/<PICRUSt2_env>/lib/python3.6/site-packages/picrust2/default_files/prokaryotic/pro_ref/	<Galaxy_dir>/database/dependencies/_conda/envs/<PICRUSt2_env>/lib/python3.6/site-packages/picrust2/default_files/prokaryiotic/pro_ref/16S_counts.txt.gz epa-ng
+#picrust2_default_dir_16S	16S	<Galaxy_dir>/database/dependencies/_conda/envs/<PICRUSt2_env>/lib/python3.6/site-packages/picrust2/default_files/prokaryotic/pro_ref/	<Galaxy_dir>/database/dependencies/_conda/envs/<PICRUSt2_env>/lib/python3.6/site-packages/picrust2/default_files/prokaryiotic/pro_ref/16S_counts.txt.gz sepp
+#picrust2_default_dir_ITS	ITS	<Galaxy_dir>/database/dependencies/_conda/envs/<PICRUSt2_env>/lib/python3.6/site-packages/picrust2/default_files/fungi/fungi_ITS/	<Galaxy_dir>/database/dependencies/_conda/envs/<PICRUSt2_env>/lib/python3.6/site-packages/picrust2/default_files/prokaryiotic/fungi/fungi_ITS/ITS_counts.txt.gz epa-ng
+#picrust2_default_dir_18S	18S	<Galaxy_dir>/database/dependencies/_conda/envs/<PICRUSt2_env>/lib/python3.6/site-packages/picrust2/default_files/fungi/fungi_18S/	<Galaxy_dir>/database/dependencies/_conda/envs/<PICRUSt2_env>/lib/python3.6/site-packages/picrust2/default_files/prokaryiotic/fungi/fungi_18S/18S_counts.txt.gz epa-ng
--- a/tree.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/tree.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -52,7 +52,7 @@

 What it does

-This tool creates a multiple alignment of OTUs with `Mafft &lt;http://mafft.cbrc.jp/alignment/software&gt;`_.
+This tool creates a multiple alignment of ASVs with `Mafft &lt;http://mafft.cbrc.jp/alignment/software&gt;`_.
 And creates a rooted phylogenetic tree with `FastTree &lt;http://www.microbesonline.org/fasttree/&gt;`_ and `Phangorn R package &lt;https://cran.r-project.org/web/packages/phangorn/index.html&gt;`_.

 .. class:: infomark page-header h2
@@ -65,14 +65,14 @@

 **Fasta file**:

-The OTU sequence file (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
+The ASV sequence file (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
 Careful: FROGS Tree works only with less than 10 000 sequences!

  .. image:: FROGS_tree_otufile.png

 **Biom file**:

-The OTUs biom file (format `biom1 &lt;http://biom-format.org/documentation/format_versions/biom-1.0.html&gt;`_).
+The ASVs biom file (format `biom1 &lt;http://biom-format.org/documentation/format_versions/biom-1.0.html&gt;`_).
 This file can be obtained in particular with the FROGS pipeline.

 .. class:: h3
@@ -87,7 +87,7 @@

 **Report file** (report.html):

-The report file describing which OTUs are contained or not in the phylogenetic tree.
+The report file describing which ASVs are contained or not in the phylogenetic tree.

 .. class:: infomark page-header h2
--- a/tsv_to_biom.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/tsv_to_biom.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -1,5 +1,5 @@
 <tool id="FROGS_tsv_to_biom" name="FROGS TSV_to_BIOM" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
-    <description>Converts a TSV file in a BIOM file</description>
+    <description>Converts a TSV file in a BIOM file 1</description>
     <macros>
         <import>macros.xml</import>
     </macros>
@@ -11,7 +11,7 @@
 	    #if $multi_affi_file
 	        --input-multi-affi '$multi_affi_file'
 	    #end if
-	    #if $extract_fasta
+	    #if $extract_fasta == "yes"
 	        --output-fasta '$sequence_file'
 	    #end if
     </command>
@@ -20,12 +20,15 @@
         <param format="tabular,tsv" name="tsv_file" type="data" label="Abundance TSV File" help="Your FROGS abundance TSV file. Take care to keep original column names." />
         <param format="tabular,tsv" name="multi_affi_file" type="data" label="Multi_affiliation TSV File" help="TSV file describing multi_affiliation blast results." optional="true" />
         <!-- Parameters -->
-        <param name="extract_fasta" type="boolean" label="Extract seeds in FASTA file" help="If there is a 'seed_sequence' column in your TSV table, you can extract seed sequences in a separated FASTA file." />
+        <param name="extract_fasta" type="select" display="radio" label="Extract seeds in FASTA file" help="If there is a 'seed_sequence' column in your TSV table, you can extract seed sequences in a separated FASTA file." >
+            <option value="yes">Yes</option>
+            <option value="no" selected="true">No</option>
+		</param>
     </inputs>
     <outputs>
         <data format="biom1" name="biom_file" label="${tool.name}: abundance.biom" from_work_dir="abundance.biom" />
         <data format="fasta" name="sequence_file" label="${tool.name}: sequences.fasta" from_work_dir="seed.fasta">
-            <filter>extract_fasta</filter>
+            <filter>extract_fasta == "yes"</filter>
         </data>
     </outputs>
     <tests>
@@ -94,11 +97,11 @@

 If you modify your abundance TSV file

-    * -do not modify column names
-    * -do not remove columns
-    * -take care to choose a taxonomy available in your multi_affiliation TSV file
-    * -if you delete lines of the multi_affiliation file, take care to not remove a complete cluster whithout removing all "multi tags" in you abundance TSV file.
-    * -if you want to rename a taxon level (ex : genus "Ruminiclostridium 5;" to genus "Ruminiclostridium;"), do not forget to modify also your multi_affiliation TSV file.
+    * do not modify column names
+    * do not remove columns
+    * take care to choose a taxonomy available in your multi_affiliation TSV file
+    * if you delete lines of the multi_affiliation file, take care to not remove a complete cluster whithout removing all "multi tags" in you abundance TSV file.
+    * if you want to rename a taxon level (ex : genus "Ruminiclostridium 5;" to genus "Ruminiclostridium;"), do not forget to modify also your multi_affiliation TSV file.