Mercurial > repos > oinizan > frogs

--- a/frogsfunc_copynumbers.xml	Tue Jul 12 09:12:27 2022 +0000
+++ b/frogsfunc_copynumbers.xml	Mon Mar 06 22:44:14 2023 +0000
@@ -31,9 +31,9 @@
     <command >
 	   frogsfunc_copynumbers.py
             --input-biom $input_biom
-	   		--tree $input_tree
+	   		--input-tree $input_tree
 	   		#if $category.value == "16S"
-	   			--in-trait $function.fields.name
+	   			--input-functions $function.fields.name
 	   		#end if
 	   		--hsp-method $hsp_method
 	   		--output-marker $out_marker
@@ -42,12 +42,12 @@
 	   		--observed-marker-table $function.fields.path
 	   		--observed-trait-table $function.fields.traits
 	   		#end if
-            --html $summary_file
+            --summary $summary_file
 	</command>
 	<inputs>
 	    <!-- Input files -->
         <param argument="--input-biom" format="biom1" name="input_biom" type="data" label="Biom file" help="The abundance file to analyse i.e. FROGSFUNC_step1_placeseqs tool output file (frogsfunc_placeseqs.biom)." optional="false"/>
-	    <param argument="--tree" format="nhx" name="input_tree" type="data" label="Tree file" help="The file contains the tree information from FROGSFUNC_step1_placeseqs tool (frogsfunc_placeseqs_tree.nwk)." optional="false"/>
+	    <param argument="--input-tree" format="nhx" name="input_tree" type="data" label="Tree file" help="The file contains the tree information from FROGSFUNC_step1_placeseqs tool (frogsfunc_placeseqs_tree.nwk)." optional="false"/>
 	   	<!-- Parameters-->
 	    <param name="category" type="select" label="Taxonomic marker" help="Taxonomic marker of interest." multiple="false" display="radio">
             <options from_data_table="frogs_picrust2_marker_table">
--- a/frogsfunc_functions.xml	Tue Jul 12 09:12:27 2022 +0000
+++ b/frogsfunc_functions.xml	Mon Mar 06 22:44:14 2023 +0000
@@ -31,20 +31,22 @@
     <command >
        frogsfunc_functions.py
             --input-biom $input_biom
-            --function $function
-            --marker $marker
+            --input-fasta $input_fasta
+            --input-function $function
+            --input-marker $marker
             --max-nsti $max_nsti
-            --function-abund $function_abund
+            --output-function-abund $function_abund
             --seqtab $seqtab
             --weighted $weighted
             --excluded $excluded
-            --html $summary_file
+            --summary $summary_file
     </command>
     <inputs>
         <!-- Input files -->
         <param argument="--input-biom" format="biom1" name="input_biom" type="data" label="Biom file" help="The abundance file i.e. FROGSFUNC_step1_placeseqs tool output file (frogsfunc_placeseqs.biom)." optional="false"/>
-        <param argument='--function' format="tsv" type="data" label="Function file" help="Copy number table of functions present in the predicted genome for each OTU i.e. FROGSFUNC_step2_copynumbers tool output file (frogsfunc_copynumbers_predicted_functions.tsv)." optional="false"/>
-        <param argument='--marker' format="tsv" type="data" label="Marker file" help="Table of predicted marker copy number i.e. FROGSFUNC_step2_copynumbers output (frogsfunc_copynumbers_marker.tsv)." optional="false"/>
+       	<param argument="--input-fasta" format="fasta" name="input_fasta" type="data" label="Sequence file" help="The fasta file i.e. from FROGSFUNC_step1_placeseqs tool output file (frogsfunc_placeseqs.fasta)." optional="false"/>
+        <param argument='--input-function' format="tsv" type="data" label="Function file" help="Copy number table of functions present in the predicted genome for each OTU i.e. FROGSFUNC_step2_copynumbers tool output file (frogsfunc_copynumbers_predicted_functions.tsv)." optional="false"/>
+        <param argument='--input-marker' format="tsv" type="data" label="Marker file" help="Table of predicted marker copy number i.e. FROGSFUNC_step2_copynumbers output (frogsfunc_copynumbers_marker.tsv)." optional="false"/>

         <!-- Parameters-->
         <param argument="--max-nsti" name="max_nsti" type="float" label="NSTI cut-off" help="Any sequence with an NSTI above this threshold will be out. (default: 2)" value="2" min="0" optional="false" />
--- a/frogsfunc_pathways.xml	Tue Jul 12 09:12:27 2022 +0000
+++ b/frogsfunc_pathways.xml	Mon Mar 06 22:44:14 2023 +0000
@@ -41,7 +41,7 @@
              --no-regroup
             #end if
           #end if
-	    --html $summary_file
+	    --summary $summary_file
 	</command>
 	<inputs>
         <!-- Input files -->
--- a/frogsfunc_placeseqs.xml	Tue Jul 12 09:12:27 2022 +0000
+++ b/frogsfunc_placeseqs.xml	Mon Mar 06 22:44:14 2023 +0000
@@ -36,12 +36,12 @@
 	   		--input-biom $input_biom
 	   		--min-align $min_align
 	   		--placement-tool $placement_tool
-	   		--out-tree $out_tree
+	   		--output-tree $output_tree
 	   		--excluded $excluded
-	   		--insert-biom $insert_biom
-	   		--insert-fasta $insert_fasta
+	   		--output-biom $output_biom
+	   		--output-fasta $output_fasta
 	   		--closests-ref $closests_ref
-	   		--html $summary_file
+	   		--summary $summary_file

 	   		#if $marker.name != "16S"
 	   		--ref-dir ${marker.value}
@@ -69,11 +69,11 @@

 	<outputs>
 		<data format="html" name="summary_file" label="${tool.name}: report.html" from_work_dir="report.html"/>
-		<data format="nhx" name="out_tree" label="${tool.name}: frogsfunc_placeseqs_tree.nwk" from_work_dir="frogsfunc_placeseqs_tree.nwk"/>
+		<data format="nhx" name="output_tree" label="${tool.name}: frogsfunc_placeseqs_tree.nwk" from_work_dir="frogsfunc_placeseqs_tree.nwk"/>
 		<data format="tsv" name="excluded" label="${tool.name}: frogsfunc_placeseqs_excluded.tsv" from_work_dir="frogsfunc_placeseqs_excluded.tsv"/>
-		<data format="fasta" name="insert_fasta" label="${tool.name}: frogsfunc_placeseqs.fasta" from_work_dir="frogsfunc_placeseqs.fasta"/>
+		<data format="fasta" name="output_fasta" label="${tool.name}: frogsfunc_placeseqs.fasta" from_work_dir="frogsfunc_placeseqs.fasta"/>
 		<data format="tsv" name="closests_ref" label="${tool.name}: frogsfunc_placeseqs_closests_ref_sequences.txt" from_work_dir="frogsfunc_placeseqs_closests_ref_sequences.txt"/>
-		<data format="biom1" name="insert_biom" label="${tool.name}: frogsfunc_placeseqs.biom" from_work_dir="frogsfunc_placeseqs.biom"/>
+		<data format="biom1" name="output_biom" label="${tool.name}: frogsfunc_placeseqs.biom" from_work_dir="frogsfunc_placeseqs.biom"/>
 	</outputs>

 	<tests>
@@ -84,10 +84,10 @@
 			<param name="ref_dir" value="databases/frogs_picrust2_db/prokaryotic/pro_ref/" />
 			<param name="min_align" value="0.8" />

-			<output name="insert_fasta" file="references/25-frogsfunc_placeseqs.fasta" compare="diff" lines_diff="0" />
-			<output name="insert_biom" file="references/25-frogsfunc_placeseqs.biom" compare="diff" lines_diff="0"/>
+			<output name="output_fasta" file="references/25-frogsfunc_placeseqs.fasta" compare="diff" lines_diff="0" />
+			<output name="output_biom" file="references/25-frogsfunc_placeseqs.biom" compare="diff" lines_diff="0"/>
 			<output name="excluded" file="references/25-frogsfunc_placeseqs_excluded.txt" compare="diff" lines_diff="0" />
-			<output name="out_tree" file="references/25-frogsfunc_placeseqs_tree.nwk" compare="diff" lines_diff="0" />
+			<output name="output_tree" file="references/25-frogsfunc_placeseqs_tree.nwk" compare="diff" lines_diff="0" />
 			<output name="closests_ref" file="references/25-frogsfunc_placeseqs_closests_ref_sequences.txt" compare="diff" lines_diff="0" />
 			<output name="summary_file" file="references/25-frogsfunc_placeseqs_report.html" compare="diff" lines_diff="0" />
 		</test>
--- a/frogsfunc_placeseqs.xml_back	Tue Jul 12 09:12:27 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,213 +0,0 @@
-<?xml version="1.0"?>
-<!--
-# Copyright (C) 2022 INRAE
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
--->
-<tool id="FROGSFUNC_step1_placeseqs" name="FROGSFUNC_step1_placeseqs" version= "@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
-	<description>Places the OTUs into a reference phylogenetic tree.</description>
-
-    <macros>
-        <import>macros.xml</import>
-    </macros>
-
-    <expand macro="requirements_frogsfunc">
-        <requirement type="package" version="3.1.2">ete3</requirement>
-    </expand>
-
-    <stdio>
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
-    <command >
-	   	frogsfunc_placeseqs.py
-	   		--input-fasta $input_fasta
-	   		--input-biom $input_biom
-	   		--min-align $min_align
-	   		--placement-tool $placement_tool.fields.value
-	   		--out-tree $out_tree
-	   		--excluded $excluded
-	   		--insert-biom $insert_biom
-	   		--insert-fasta $insert_fasta
-	   		--closests-ref $closests_ref
-	   		--html $summary_file
-
-	   		#if $marker.name != "16S"
-	   		--ref-dir ${marker.value}
-	   		#end if
-	</command>
-	<inputs>
-    	<!-- Input files -->
-		<param argument="--input-fasta" format="fasta" name="input_fasta" type="data" label="Sequence file" help="The sequence file to analyse (format: fasta)." optional="false"/>
-		<param argument="--input-biom" format="biom1" name="input_biom" type="data" label="Biom file" help="The abundance file to analyse (format: biom). Taxonomic affiliations must be inside (FROGS Affiliation OTU step)." optional="false"/>
-
-    	<!-- Parameters -->
-        <param argument="--ref-dir" name="marker" type="select" label="Taxonomy marker" display="radio" multiple="false" help="Taxonomic marker of interest.">
-			<options from_data_table="frogs_picrust2_ref">
-	     		<column name="name" index="1"/>
-	        	<column name="value" index="2"/>
-                <filter type="unique_value" column="1"/>
-					<validator type="no_options" message="A built-in database is not available" />
-			</options>
-		</param>
-        <param argument="--placement-tool" name="placement_tool" type="select" label="Placement tool" help="Placement tool for insertion of sequences into the reference tree. SEPP is a low-memory alternative to EPA-ng for placing sequences." multiple="false" display="radio">
-			<options from_data_table="frogs_picrust2_ref">
-				<column name="name" index="3" />
-				<column name="value" index="3" />
-                <filter type="param_value" ref="marker" column="2" />
-					<validator type="no_options" message="A built-in database is not available" />
-			</options>
-		</param>
-		<param argument="--min-align" name="min_align" type="float" label="Minimum alignment length" help="Proportion of the total length of an input sequence that must align with reference sequences. All other will be out. (default: 0.80)" value="0.8" min="0" max="1" optional="false" />
-	</inputs>
-
-	<outputs>
-		<data format="html" name="summary_file" label="${tool.name}: report.html" from_work_dir="report.html"/>
-		<data format="nhx" name="out_tree" label="${tool.name}: frogsfunc_placeseqs_tree.nwk" from_work_dir="frogsfunc_placeseqs_tree.nwk"/>
-		<data format="tsv" name="excluded" label="${tool.name}: frogsfunc_placeseqs_excluded.tsv" from_work_dir="frogsfunc_placeseqs_excluded.tsv"/>
-		<data format="fasta" name="insert_fasta" label="${tool.name}: frogsfunc_placeseqs.fasta" from_work_dir="frogsfunc_placeseqs.fasta"/>
-		<data format="tsv" name="closests_ref" label="${tool.name}: frogsfunc_placeseqs_closests_ref_sequences.txt" from_work_dir="frogsfunc_placeseqs_closests_ref_sequences.txt"/>
-		<data format="biom1" name="insert_biom" label="${tool.name}: frogsfunc_placeseqs.biom" from_work_dir="frogsfunc_placeseqs.biom"/>
-	</outputs>
-
-	<tests>
-		<test>
-			<param name="input_fasta" value="input/frogsfunc.fasta" />
-            <param name="input_biom" value="input/frogsfunc.biom" />
-            <param name="placement_tool" value="sepp"/>
-			<param name="ref_dir" value="databases/frogs_picrust2_db/prokaryotic/pro_ref/" />
-			<param name="min_align" value="0.8" />
-
-			<output name="insert_fasta" file="references/25-frogsfunc_placeseqs.fasta" compare="diff" lines_diff="0" />
-			<output name="insert_biom" file="references/25-frogsfunc_placeseqs.biom" compare="diff" lines_diff="0"/>
-			<output name="excluded" file="references/25-frogsfunc_placeseqs_excluded.txt" compare="diff" lines_diff="0" />
-			<output name="out_tree" file="references/25-frogsfunc_placeseqs_tree.nwk" compare="diff" lines_diff="0" />
-			<output name="closests_ref" file="references/25-frogsfunc_placeseqs_closests_ref_sequences.txt" compare="diff" lines_diff="0" />
-			<output name="summary_file" file="references/25-frogsfunc_placeseqs_report.html" compare="diff" lines_diff="0" />
-		</test>
-	</tests>
-
-     <help>
-
-@HELP_LOGO@
-
-.. class:: infomark page-header h2
-
-What it does
-
-FROGSFUNC_step1_placeseqs is the first step of PICRUSt2. It inserts your study sequences into a reference tree (`details &lt;https://github.com/picrust/picrust2/wiki/Sequence-placement&gt;`_). By default, this reference tree is based on 20,000 16S sequences from genomes in the `Integrated Microbial Genomes database &lt;https://img.jgi.doe.gov/&gt;`_. The script performs this step, which specifically:
-
-    - Aligns your study sequences with a multiple-sequence alignment of reference 16S, ITS or 18S sequences with `HMMER &lt;http://hmmer.org&gt;`_.
-
-    - Finds the most likely placements of your study sequences in the reference tree with `EPA_NG &lt;https://github.com/Pbdas/epa-ng#build-instructions&gt;`_ or `SEPP &lt;https://github.com/smirarab/sepp&gt;`_.
-
-    - Produces a treefile with the most likely placement for each sequence as the new tips with `GAPPA &lt;https://github.com/lczech/gappa&gt;`_.
-
-.. class:: infomark page-header h2
-
-Inputs/Outputs
-
-.. class:: h3
-
-Input
-
-
-**Sequence file** (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_):
-
- The OTUs fasta sequence file.
-
-**Biom file** (format `biom1 &lt;http://biom-format.org/documentation/format_versions/biom-1.0.html&gt;`_):
-
- The OTUs biom file. Taxonomic affiliations must be done before (bion file form FROGS Affiliation OTU tool).
-
-
-**Placement tool**
-
- EPA-NG or SEPP. SEPP is a low-memory alternative to EPA-ng for placing sequences.
-
-.. class:: warningmark
-
-So, if the tool crashes with EPA-ng, try again with SEPP.
-
-
-.. class:: h3
-
-Outputs
-
-**Newick file** (frogsfunc_placeseqs_tree.nwk):
-
- The phylogenetic tree output with insert sequences into the reference tree (format: newick). (format `nhx &lt;https://en.wikipedia.org/wiki/Newick_format&gt;`_).
-
-**Excluded sequence file** (frogsfunc_placeseqs_excluded.tsv):
-
- List of sequences not inserted in the PICRUSt2 reference tree. Sequences are excluded if the total length of the input sequence aligned against reference sequence is less than the specified " Minimum alignment length " threshold (0.8 per default). These sequences are excluded for the next steps.
-
-**Closests reference sequences file** (frogsfunc_placeseqs_closests_ref_sequences.txt):
-
- Information on the sequences from the PICRUST2 reference tree that are the closest neighbours of your studied sequences.
-
-**File of sequences placed in the phylogenetic tree** (frogsfunc_placeseqs.fasta)
-
- Sequence file of inserted sequences into PICRUST2 reference tree.
-
-**Abundance table** (frogsfunc_placeseqs.biom)
-
- OTU abundance biom file of inserted sequences into reference tree.
-
-
-**Report file** (report.html):
-
- The report file describes which OTUs are contained or not in the phylogenetic tree. Note that PICRUSt2 uses its own reference tree to affiliate OTUs/ASVs from reference sequences. The report file indicates for each OTU/ASV which is the closest PICRUSt2 reference sequence, and compares it to the original FROGS taxonomy. Clicking on the sequence ID gives you more information about it (`JGI database &lt;https://img.jgi.doe.gov/&gt;`_).
-
-.. image:: FROGS_frogsfunc_placeseqs_piecharts.png
-	:height: 290
-	:width: 676
-
-
-The pie charts describe the proportion of number of OTUs/ASVs excluded and the proportion of total sequences excluded for the following steps.
-OTUs/ASVs are excluded if the total length of the input sequence aligned against reference sequence is less than the specified " Minimum alignment length " threshold parameter.
-
-.. image:: FROGS_frogsfunc_placeseqs_table_JGI.png
-	:height: 580
-	:width: 1220
-
-* **Cluster** : OTU name.
-
-* **Nb sequences** : OTU sequence abundances.
-
-* **FROGS Taxonomy** : Taxonomic affiliation made by FROGS (FROGS Affiliation OTU).
-
-* **PICRUSt2 closest ID (JGI)** : Identifiant (JGI) of the closest reference sequence from the OTU inserted in the reference tree (see the explanatory illustration at the bottom of this page).
-
-* **PICRUSt2 closest reference name** : Genome Name / Sample Name.
-
-* **PICRUSt2 closest taxonomy** : Taxonomy (JGI) of the closest reference sequence from the OTU inserted in the reference tree under the following format: Kingdom;Phylum;Class;Order;Family;Genus;Species
-
-* **NSTI** : Nearest Sequenced Taxon Index (`NSTI &lt;https://www.nature.com/articles/nbt.2676&gt;`_) is the phylogenetic distance between the OTU and the nearest sequenced reference genome. This metric can be used to identify OTUs that are highly distant from all reference sequences (the predictions for these sequences are less reliable!). The higher the NSTI score, the less the affiliations are relevant. Any OTUs with a NSTI value higher than 2 are typically either from uncharacterized phyla or off-target sequences.
-
-* **NSTI confidence** : According to the NSTI score, we guide you in the confidence you can bring to the issue affiliation of PICRUSt2.
-
-* **Lowest same taxonomic rank between FROGS and PICRUSt2** : Comparison between FROGS and PICRUSt2 taxonomic affiliations. Lowest common taxonomic rank between FROGS and PICRUSt2 affiliations.
-
-* **Comment** : " identical taxonomy " if the FROGS and PICRUSt2 taxonomic affiliations are identical. " identical sequence " if the OTU sequence is strictly the same as the reference sequence.
-
-.. image:: FROGS_frogsfunc_placeseqs_closest_explained.png
-
-Closest reference sequence (from JGI database) from one cluster sequence.
-
-@HELP_CONTACT@
-
-    </help>
-    <expand macro="citations" />
-</tool>
--- a/macros.xml	Tue Jul 12 09:12:27 2022 +0000
+++ b/macros.xml	Mon Mar 06 22:44:14 2023 +0000
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@TOOL_VERSION@">4.0.1</token>
+    <token name="@TOOL_VERSION@">4.1.0</token>
     <token name="@VERSION_SUFFIX@">1</token>

     <xml name="requirements">
@@ -39,10 +39,12 @@
             <validator type="regex">[A-Za-z ]+</validator>
         </param>
     </xml>
+
     <xml name="sanitizer_validator">
         <sanitizer invalid_char="">
             <valid initial="string.letters,string.digits">
                 <add value="+" />
+                <add value="*" />
                 <add value="-" />
                 <add value="=" />
                 <add value=" " />
@@ -51,7 +53,7 @@
                 <add value="." />
             </valid>
         </sanitizer>
-        <validator type="regex">[A-Za-z0-9 =-_+,.]+</validator>
+        <validator type="regex">[A-Za-z0-9+*-= _,.]+</validator>
     </xml>

     <token name="@HELP_LOGO@">
--- a/preprocess.xml	Tue Jul 12 09:12:27 2022 +0000
+++ b/preprocess.xml	Mon Mar 06 22:44:14 2023 +0000
@@ -1,23 +1,23 @@
 <tool id="FROGS_preprocess" name="FROGS Pre-process" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="GPL-2.0-only" profile="20.05">
     <description>merging, denoising and dereplication</description>
-    <macros>
+     <macros>
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements">
         <requirement type="package" version="2.17.0">vsearch</requirement>
         <requirement type="package" version="1.2.11">flash</requirement>
         <requirement type="package" version="2.10">cutadapt</requirement>
-    </expand>
+    </expand>
     <command detect_errors="exit_code">
         preprocess.py '$sequencer_type.sequencer_selected'
         --output-dereplicated '$dereplicated_file' --output-count '$count_file' --summary '$summary_file'
         @CPUS@
         --min-amplicon-size $sequencer_type.min_amplicon_size
         --max-amplicon-size $sequencer_type.max_amplicon_size
-        #if $sequencer_type.sequencer_selected == "illumina"
-            #if $sequencer_type.sequencing_protocol.sequencing_protocol_selected == "standard"
-                --five-prim-primer '$sequencer_type.sequencing_protocol.five_prim_primer'
-                --three-prim-primer '$sequencer_type.sequencing_protocol.three_prim_primer'
+        #if $sequencer_type.sequencer_selected in ('illumina', 'longreads')
+            #if $sequencer_type.is_primer_in_seq.primer_choice
+                --five-prim-primer '$sequencer_type.is_primer_in_seq.five_prim_primer'
+                --three-prim-primer '$sequencer_type.is_primer_in_seq.three_prim_primer'
             #else
                 --without-primers
             #end if
@@ -37,7 +37,7 @@
                 --merge-software $sequencer_type.input_type.archive_type.merge_software_type.merge_software
                 #if $sequencer_type.input_type.archive_type.merge_software_type.merge_software == "flash"
                     --expected-amplicon-size $sequencer_type.input_type.archive_type.merge_software_type.expected_amplicon_size
-            #end if
+                #end if
                 #if $sequencer_type.input_type.archive_type.keep_unmerged
                     --keep-unmerged
                 #end if
@@ -72,14 +72,14 @@
                     #end if
                 #end if
             #else
+                --samples-names
+                #for $current in $sequencer_type.input_type.samples
+                    $sep'${current.name.strip()}'
+                #end for
                 --input-R1
                 #for $current in $sequencer_type.input_type.samples
                     $sep'${current.R1_file}'
                 #end for
-                --samples-names
-                #for $current in $sequencer_type.input_type.samples
-                    $sep'${current.name.strip()}'
-                #end for
             #end if
         #end if
     </command>
@@ -87,6 +87,7 @@
         <conditional name="sequencer_type">
             <param name="sequencer_selected" type="select" label="Sequencer" help="Select the sequencing technology used to produce the sequences.">
                 <option value="illumina" selected="true">Illumina</option>
+                <option value="longreads">Longreads (PACBIO, ONT)</option>
                 <option value="454">454</option>
             </param>
             <when value="illumina">
@@ -99,16 +100,15 @@
                     <when value="archive">
                         <param name="archive_file" type="data" format="tar,tgz" label="TAR archive file" help="The TAR file containing the sequences file(s) for each sample." />
                         <conditional name="archive_type">
-                            <param name="archive_type_selected" type="select" label="Are reads already merged ?" help="The archive contains 1 file by sample : R1 and R2 pair are already merged in one sequence.">
+                            <param name="archive_type_selected" type="select" label="Are reads already merged ?" help="Yes = The archive contains 1 file by sample : R1 and R2 pairs are already merged in one sequence.">
                                 <option value="paired" selected="true">No</option>
                                 <option value="already_merged">Yes</option>
                             </param>
-                            <!-- $sequencer_type.input_type.archive_type.archive_type_selected == "already_merged"  -->
                             <when value="paired">
                                 <!-- Reads size -->
                                 <param name="R1_size" type="integer" label="Reads 1 size" help="The maximum read1 size." value="" />
                                 <param name="R2_size" type="integer" label="Reads 2 size" help="The maximum read2 size." value="" />
-                                <param argument="--mismatch-rate" type="float" label="Mismatch rate" help="The maximum rate of mismatch in the overlap region" value="0.1" />
+                                <param argument="--mismatch-rate" type="float" label="Mismatch rate" help="The maximum rate of mismatches in the overlap region" value="0.1" />
                                 <conditional name="merge_software_type">
                                     <param argument="--merge-software" type="select" label="Merge software" help="Select the software to merge paired-end reads">
                                         <option value="vsearch" selected="true">Vsearch</option>
@@ -119,14 +119,14 @@
                                     </when>
                                     <when value="vsearch"></when>
                                 </conditional>
-                                <param argument="--keep-unmerged" type="boolean" label="Would you like to keep unmerged reads?" help="No : Unmerged reads will be excluded; Yes : unmerged reads will be artificially combined with 100 N. (default No)" />
+                                <param argument="--keep-unmerged" type="boolean" label="Would you like to keep unmerged reads?" help="No = Unmerged reads will be excluded; Yes = unmerged reads will be artificially combined with 100 N. (default No)" />
                             </when>
                             <when value="already_merged"></when>
                         </conditional>
                     </when>
                     <when value="files_by_samples">
                         <conditional name="files_by_samples_type">
-                            <param name="files_by_samples_type_selected" type="select" label="Are reads already merged ?" help="The inputs contain 1 file by sample : R1 and R2 pair are already merged in one sequence.">
+                            <param name="files_by_samples_type_selected" type="select" label="Are reads already merged ?" help="Yes = The inputs contain 1 file by sample : R1 and R2 pairq are already merged in one sequence.">
                                 <option value="paired" selected="true">No</option>
                                 <option value="already_merged">Yes</option>
                             </param>
@@ -153,7 +153,7 @@
                                     </when>
                                     <when value="vsearch"></when>
                                 </conditional>
-                                <param argument="--keep-unmerged" type="boolean" label="Would you like to keep unmerged reads?" help="No : Unmerged reads will be excluded; Yes : unmerged reads will be artificially combined with 100 N. (default No)" />
+                                <param argument="--keep-unmerged" type="boolean" label="Would you like to keep unmerged reads?" help="No = Unmerged reads will be excluded; Yes = unmerged reads will be artificially combined with 100 N. (default No)" />
                             </when>
                             <when value="already_merged">
                                 <repeat name="samples" title="Samples" min="1">
@@ -167,29 +167,71 @@
                     </when>
                 </conditional>
                 <!-- Amplicons -->
-                <param argument="--min-amplicon-size" type="integer" value="" label="Minimum amplicon size" help="The minimum size for the amplicons (with primers)"/>
-                <param argument="--max-amplicon-size" type="integer" value="" label="Maximum amplicon size" help="The maximum size for the amplicons (with primers)"/>
+                <param argument="--min-amplicon-size" type="integer" value="" label="Minimum amplicon size" help="The minimum size of the amplicons (with primers)"/>
+                <param argument="--max-amplicon-size" type="integer" value="" label="Maximum amplicon size" help="The maximum size of the amplicons (with primers)"/>
                 <!-- Primers -->
-                <conditional name="sequencing_protocol">
-                    <param name="sequencing_protocol_selected" type="select" label="Sequencing protocol" help="The protocol used for sequencing step: standard or custom with PCR primers as sequencing primers.">
-                        <option value="standard" selected="true">Illumina standard</option>
-                        <option value="without_primers">Custom protocol (Kozich et al. 2013)</option>
-                    </param>
-                    <when value="standard">
-                        <param argument="--five-prim-primer" type="text" label="5' primer" help="The 5' primer sequence (wildcards are accepted). The orientation is detailed below in 'Primers parameters' help section">
+                <conditional name="is_primer_in_seq">
+                    <param name="primer_choice" type="boolean" checked="true" label="Do the sequences have PCR primers?" help=""/>
+                    <when value="true">
+                        <param argument="--five-prim-primer" type="text"  label="5' primer" help="The 5' primer sequence (wildcards are accepted). This primer must be written in 5' to 3' orientation (see details in 'Primers parameters' help section)">
                             <sanitizer invalid_char="">
                                 <valid initial="string.letters"/>
                             </sanitizer>
                             <validator type="regex">[A-Za-z]+</validator>
                         </param>
-                        <param argument="--three-prim-primer" type="text" label="3' primer" help="The 3' primer sequence (wildcards are accepted). The orientation is detailed below in 'Primers parameters' help section">
+                        <param argument="--three-prim-primer" type="text"  label="3' primer" help="The 3' primer sequence (wildcards are accepted). This primer must be written in 5' to 3' orientation (see details in 'Primers parameters' help section)">
                             <sanitizer invalid_char="">
                                 <valid initial="string.letters"/>
                             </sanitizer>
                             <validator type="regex">[A-Za-z]+</validator>
                         </param>
                     </when>
-                    <when value="without_primers"></when>
+                    <when value="false"></when>
+                </conditional>
+            </when>
+
+            <when value="longreads">
+                <!-- Samples -->
+                <conditional name="input_type">
+                    <param name="input_type_selected" type="select" label="Input type" help="Samples files can be provided in single archive or with one file by sample.">
+                        <option value="files_by_samples">One file by sample</option>
+                        <option value="archive" selected="true">TAR Archive</option>
+                    </param>
+                    <when value="archive">
+                        <param name="archive_file" type="data" format="tar,tgz" label="TAR archive file" help="The TAR file containing the sequences file for each sample." />
+                    </when>
+                    <when value="files_by_samples">
+                        <repeat name="samples" title="Samples" min="1">
+                            <param name="name" type="text" label="Name" help="The sample name.">
+                                <expand macro="sanitizer_validator"/>
+                            </param>
+                            <param format="fastq" name="R1_file" type="data" label="Sequence file" help="FASTQ file of sample." />
+                        </repeat>
+                    </when>
+                </conditional>
+
+                <!-- Amplicons -->
+                <param argument="--min-amplicon-size" type="integer" value="" label="Minimum amplicon size" help="The minimum size for the amplicons (with primers)"/>
+                <param argument="--max_amplicon-size" type="integer" value="" label="Maximum amplicon size" help="The maximum size for the amplicons (with primers)"/>
+
+                <!-- Primers -->
+                <conditional name="is_primer_in_seq">
+                    <param name="primer_choice" type="boolean" checked="true" label="Do the sequences have PCR primers?" help=""/>
+                    <when value="true">
+                        <param argument="--five-prim-primer" type="text"  label="5' primer" help="The 5' primer sequence (wildcards are accepted). This primer must be written in 5' to 3' orientation (see details in 'Primers parameters' help section)">
+                            <sanitizer invalid_char="">
+                                <valid initial="string.letters"/>
+                            </sanitizer>
+                            <validator type="regex">[A-Za-z]+</validator>
+                        </param>
+                        <param argument="--three-prim-primer" type="text"  label="3' primer" help="The 3' primer sequence (wildcards are accepted). This primer must be written in 5' to 3' orientation (see details in 'Primers parameters' help section)">
+                            <sanitizer invalid_char="">
+                                <valid initial="string.letters"/>
+                            </sanitizer>
+                            <validator type="regex">[A-Za-z]+</validator>
+                        </param>
+                    </when>
+                    <when value="false"></when>
                 </conditional>
             </when>

@@ -216,13 +258,13 @@
                 <param argument="--min-amplicon-size" type="integer" value="" label="Minimum amplicon size" help="The minimum size for the amplicons (with primers)"/>
                 <param argument="--max_amplicon-size" type="integer" value="" label="Maximum amplicon size" help="The maximum size for the amplicons (with primers)"/>
                 <!-- Primers -->
-                <param argument="--five-prim-primer" type="text"  label="5' primer" help="The 5' primer sequence (wildcards are accepted). The orientation is detailed below in 'Primers parameters' help section">
+                <param argument="--five-prim-primer" type="text"  label="5' primer" help="The 5' primer sequence (wildcards are accepted). This primer must be written in 5' to 3' orientation (see details in 'Primers parameters' help section)">
                     <sanitizer invalid_char="">
                         <valid initial="string.letters"/>
                     </sanitizer>
                     <validator type="regex">[A-Za-z]+</validator>
                 </param>
-                <param argument="--three-prim-primer" type="text"  label="3' primer" help="The 3' primer sequence (wildcards are accepted). The orientation is detailed below in 'Primers parameters' help section">
+                <param argument="--three-prim-primer" type="text"  label="3' primer" help="The 3' primer sequence (wildcards are accepted). This primer must be written in 5' to 3' orientation (see details in 'Primers parameters' help section)">
                     <sanitizer invalid_char="">
                         <valid initial="string.letters"/>
                     </sanitizer>
@@ -301,62 +343,67 @@

 @HELP_LOGO@

-.. class:: infomark page-header h2
+.. class:: h2

 What it does

 FROGS Pre-process filters and dereplicates amplicons for use in diversity analysis.

-.. class:: infomark page-header h2
-
-Inputs/Outputs
-
-.. class:: h3
+.. class:: h2

 Inputs

-Sample files added one after another or provide in an archive file (tar or tar.gz).
-
-.. container:: row
-
- .. container:: col-md-6
+Sequencer (methods used to sequence data):
+	- short reads : Illumnia Miseq , Hiseq (paired-ends or single-ends)
+	- long reads : PACBIO or Oxford Nanopore Technology (single-ends)
+	- short reads : 454 (single-ends)

-  **Illumina inputs**
+Input file to submit and *"Are reads already merged ?"* parameter:
+    - a .tar archive (option Archive TAR) containing one file *_R1* and one file *_R2* per sample if the sequences are paired and not merged. ex: samplesA-B-C-D.tar(.gz)
+    - or a .tar archive (option Archive TAR) containing one file per sample (i) if the sequences are paired-end and already merged or (ii) if the sequences are single-end. ex: samplesA-B.tar(.gz) in this case reply *Yes* at *"Are reads already merged ?"*.
+    - or one file by sample (option One file by sample). ex: sampleA_R1.fastq(.gz) + sampleA_R2.fastq(.gz) + sampleB_R1.fastq(.gz) + sampleB_R2.fastq(.gz) if you have 2 samples A and B and sequences are paired and not merged
+    - or one file by sample (option One file by sample). ex: sampleA.fastq(.gz) + sampleB.fastq(.gz) if sequences are paired-end and already merged. Iin this case reply *Yes* at *"Are reads already merged ?"*.
+
+Remark:
+	- The sample name must be of R1 and R2 files must be end with *_R1* and *_R2*. The upstream part from this tag (_R1 and _R2) will be consider as sample name. ex: sampleA_R1.fastq + sampleA_R2.fastq, the kept name will be sampleA
+	- sample files (alone or inside an archive) must be in `FASTQ &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_ format or fastq.gz.

-   :Usage: For samples sequenced in paired-end. In case of overlapping pair, the maximum amplicon length (including primers) must be inferior or equal to the length of the R1 plus R2 length minus 10. R1 and R2 are merged by the common region with a minimum length of 10.
-   :Files: One R1 and R2 by sample (format `FASTQ &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_)
-   :Example: splA_R1.fastq.gz,  splA_R2.fastq.gz,  splB_R1.fastq.gz,  splB_R2.fastq.gz
+
+.. class:: h4

-  OR
+For paired-end reads:
+
+**read size:** The maximum size of read R1 and of read R2. It is common to find read sizes of 150, 250 or 300 for Illumina sequencers.

-   :Usage: For samples sequenced in single-ends or when R1 and R2 reads are already merged.
-   :Files: One sequence file by sample (format `FASTQ &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_).
-   :Example: splA.fastq.gz,  splB.fastq.gz
+**Mismatch rate:** The allowed maximum rate of mismatches during the merging between overlap sections of R1 and R2 reads. By default, the mismatch rate is 10%.
+
+**Merge software:** For read merging it is possible to choose between 2 software `VSEARCH &lt;https://github.com/torognes/vsearch/&gt;`_ (by default) or `FLASH &lt;http://ccb.jhu.edu/software/FLASH/&gt;`_.

- .. container:: col-md-6
+**Would you like to keep unmerged reads?:** In some cases, it is necessary to keep unmergeable reads (ITS, non-mergeable reads *i.e.* V1V4 of 16S rRNA). *No* (by default) = Unmerged reads will be excluded; *Yes* = unmerged reads will be artificially combined and kept for following process.
+
+
+.. class:: h4
+
+For paired-end and single-ends reads:

-  **454 inputs**
+**Minimum amplicon length:** The minimum size of the amplicons after read (R1, R2) pair merging.
+
+**Maximum amplicon length:** The maximum size of the amplicons after read (R1, R2) pair merging. In case of overlapping pairs, the maximum amplicon length (including primers) must be inferior or equal to the length of the R1 plus R2 length minus 10. R1 and R2 are merged by the common region with a minimum length of 10.

-   :Files: One sequence file by sample (format `FASTQ &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_)
-   :Example: splA.fastq.gz,  splB.fastq.gz
+Do the sequences have PCR primers?:
+	- Yes (By default), after processing, the sequences will be returned without the PCR primers.
+	- No, the sequences do not contain PCR primer (`Kozich et al. 2013 &lt;http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3753973/&gt;`_)

-Remark: In an archive, if you use R1 and R2 files, their names must end with *_R1* and *_R2*. The upstream part from this tag (_R1 and _R2) will be consider as sample name.

-.. class:: h3
+.. class:: h2

 Outputs

-**Sequence file** (dereplicated.fasta):
-
- Only one file with all samples sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_). These sequences are dereplicated: strictly identical sequences are represented only once, the initial count by sample is kept in count file (see bellow) and the total count is added in the sequence header. A "FROGS_combined" suffix will be added to un-merged pair sequences if you want to keep them.
-
-**Count file** (count.tsv):
+**Sequence file** (dereplicated.fasta): Only one file with all samples sequences (format `FASTA &lt;https://en.wikipedia.org/wiki/FASTA_format&gt;`_). These sequences are dereplicated: strictly identical sequences are represented only once, the initial count by sample is kept in count file (see bellow) and the total count is added in the sequence header. A "FROGS_combined" suffix will be added to unmerged paired sequences if you want to keep them.

- This file contains the count of all unique sequences in each sample (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).
+**Count file** (count.tsv): This file contains the count of all unique sequences in each sample (format `TSV &lt;https://en.wikipedia.org/wiki/Tab-separated_values&gt;`_).

-**Report file** (report.html):
-
- This file reports the number of remaining sequences after each filter (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_). Depending of the tool configuration there will be more or less filtering steps so more or less bars in the barplot.
+**Report file** (report.html): This file reports the number of remaining sequences after each filter (format `HTML &lt;https://en.wikipedia.org/wiki/HTML&gt;`_). Depending of the tool configuration there will be more or less filtering steps so more or less bars in the barplot.

  .. image:: FROGS_preprocess_summary_v3.png
      :height: 850
@@ -368,44 +415,64 @@
      :height: 379
      :width: 364

-.. class:: infomark page-header h2
+
+.. class:: h2

 How it works ?

 .. csv-table::
-   :header: "Steps", "Illumina", "454"
-   :widths: 5, 150, 150
-   :class: table table-striped
+   :header: "Steps", "Illumina"
+   :widths: 10, 150
+   :class: table table-hover
+
+   "1", "For unmerged data: Merges R1 and R2 with a maximum of M% mismatch in the overlaped region(`VSEARCH &lt;https://github.com/torognes/vsearch/&gt;`_ or `FLASH &lt;http://ccb.jhu.edu/software/FLASH/&gt;`_ or optionnaly `PEAR &lt;https://sco.h-its.org/exelixis/web/software/pear/&gt;`_) with a minimum of 10 bp in the overlap region. Resulting unmerged reads may optionnaly be artificially combined by adding 100 N between the reads"
+   "2", "If reads contains after sequencing the PCR primers: process removes sequences where the two primers are not present and removes primers in the kept sequences (`cutadapt &lt;http://cutadapt.readthedocs.org/en/latest/guide.html&gt;`_). The primer search accepts 10% of differences."
+   "3", "Process filters sequences with ambiguous nucleotides and for *merged* sequences filters on their length that must be ranged between 'Minimum amplicon size minus primer length' and 'Maximum amplicon size minus primer length'"
+   "4", "Dereplicates sequences"
+
+.. csv-table::
+   :header: "Steps", "Longreads"
+   :widths: 10, 150
+   :class: table table-hover

-   "1", "For un-merged data: Merges R1 and R2 with a maximum of M% mismatch in the overlaped region(`VSEARCH &lt;https://github.com/torognes/vsearch/&gt;`_ or `FLASH &lt;http://ccb.jhu.edu/software/FLASH/&gt;`_ or optionnaly `PEAR &lt;https://sco.h-its.org/exelixis/web/software/pear/&gt;`_) with a minimum of 10 bp in the overlap region. Resulting un-merged reads may optionnaly be artificially combined by adding 100 N between the reads", "/"
-   "2", "If sequencing protocol is the illumina standard protocol: Removes sequences where the two primers are not present and removes primers in the remaining sequence (`cutadapt &lt;http://cutadapt.readthedocs.org/en/latest/guide.html&gt;`_). The primer search accepts 10% of differences", "Removes sequences where the two primers are not present, removes primers sequence from amplicon sequence and reverse complement the sequences on strand -  (`cutadapt &lt;http://cutadapt.readthedocs.org/en/latest/guide.html&gt;`_). The primer search accepts 10% of differences"
-   "3", "Filters sequences with ambiguous nucleotides and for merged sequences filters on their length which must be range between 'Minimum amplicon size - primer length' and 'Maximum amplicon size - primer length'", "Removes sequences with at least one homopolymer with more than seven nucleotides and with a distance of less than or equal to 10 nucleotides between two poor quality positions, i.e. with a Phred quality score lesser than 10"
-   "4", "Dereplicates sequences", "Dereplicates sequences"
+   "1", "Non merging process, longreads from PACBIO or ONT are single-end reads"
+   "2", "If reads contains after sequencing the PCR primers: process searches 5' primer on reads, then for all reads without 5' primer found the process (`cutadapt &lt;http://cutadapt.readthedocs.org/en/latest/guide.html&gt;`_) reverse-transcripts reads and searches again 5' primer (in dereplicates.fasta output, sequences have *rc* tag in the header when they have been reverse-complemented by cutadapt). Remark, after this step all reads are in same sens (5' -> 3'). Last step consists to search 3' primer on all theses reads. Process removes reads if 5' primer or 3' primer are not find at the end of process. When primers are found, reads are trimmed. The primer search accepts 10% of differences"
+   "3", "Process filters sequences with ambiguous nucleotides and on their length that must be range between 'Minimum amplicon size minus primer length' and 'Maximum amplicon size minus primer length'"
+   "4", "Dereplicates sequences"

+.. csv-table::
+   :header: "Steps", "454"
+   :widths: 10, 150
+   :class: table table-hover

-.. class:: infomark page-header h2
+   "1", "Non merging process, 454 reads are single-end reads"
+   "2", "Removes sequences where the two primers are not present, removes primers sequence from amplicon sequence and reverse complement the sequences on strand -  (`cutadapt &lt;http://cutadapt.readthedocs.org/en/latest/guide.html&gt;`_). The primer search accepts 10% of differences"
+   "3", "Removes sequences with at least one homopolymer with more than seven nucleotides and with a distance of less than or equal to 10 nucleotides between two poor quality positions, *i.e.* with a Phred quality score lesser than 10"
+   "4", "Dereplicates sequences"
+
+.. class:: h2

 Advices/details on parameters

-.. class:: h3
+.. class:: h4

-Keeping or not un-merged paired reads
+Keeping or not unmerged paired-end reads

 .. class:: warningmark

 This option is usefull when and only when, **targeted amplicon is longer than the sequencing technology** can provide (ITS amplicons, V1-V4 region of 16S for example). In other case, carefully, you will only keep noise in your analysis.

-.. class:: h3
+.. class:: h4

 What is the difference between overlapped sequences and combined sequences?

-- **Case of a sequencing of overlapping sequences: case of 16S V3-V4 amplicon MiSeq sequencing**
+**Case of a sequencing of overlapping sequences: case of 16S V3-V4 amplicon MiSeq sequencing**

 .. image:: FROGS_preprocess_overlapped_sequence.png
      :height: 261
      :width: 531

-- **Case of a sequencing of non-overlapping sequences: case of ITS1 amplicon MiSeq sequencing**
+**Case of a sequencing of non-overlapping sequences: case of ITS1 amplicon MiSeq sequencing**

 .. image:: FROGS_preprocess_combined_sequence1.png
      :height: 279
@@ -427,13 +494,13 @@
      :height: 357
      :width: 798

-.. class:: h3
+.. class:: h4

 Primers parameters

-The (`Kozich et al. 2013 &lt;http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3753973/&gt;`_ ) protocol uses custom sequencing primers that are also the PCR primers. In this case, the reads do not contain the PCR primers.
+The `Kozich et al. 2013 &lt;http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3753973/&gt;`_ protocol uses custom sequencing primers that are also the PCR primers. In this case, the reads do not contain the PCR primers.

-In case of Illumina standard protocol, the primers must be provided in 5' to 3' orientation.
+In case of standard protocol, the primers must be provided in 5' to 3' orientation.

 .. role:: alert-info

@@ -445,15 +512,21 @@

  Value for parameter 3' primer: ATTTCAG

-.. class:: h3
+
+.. class:: h4
+
+What happens if the 'merged' filter drasticaly reduces the number of sequences ?:
+
+After merging step with VSEARCH, PEAR or FLASH, if you observe a loss of more than 20% in all samples, this can highlight a quality problem (see `FastQC &lt;http://www.bioinformatics.babraham.ac.uk/projects/fastqc/&gt;`_).
+
+If the overlap between R1 and R2 is superior to 50 nucleotides and the quality of the end of the sequences is poor (see `FastQC &lt;http://www.bioinformatics.babraham.ac.uk/projects/fastqc/&gt;`_) you can try to cut the end of your sequences and relaunch the preprocess tool. You can either raise the mismatch percent in the overlapped region, but not too much!
+
+
+.. class:: h4

 FLASH : Amplicon size parameters

- .. class:: infomark
-
- We now recommend to use PEAR if availbale (only for accademic user) or Vsearch. PEAR is available only in command line.
-
- The two following images show two examples of perfect values for sizes parameters.
+The two following images show two examples of perfect values for sizes parameters.

  .. image:: FROGS_preprocess_ampliconSize_unimodal_v3.png
     :height: 415
@@ -463,15 +536,10 @@
     :height: 415
     :width: 676

- Don't worry the "Expected amplicon size" does not need to be very accurate, and only necessary for sequences merging with FLASH.
-
-.. class:: h3
+Don't worry, the "Expected amplicon size" does not need to be very accurate, and only necessary for sequences merging with FLASH.

-If the filter 'merged' reduce drasticaly the number of sequences:
+**Remark :** We recommend to use PEAR if availbale (only for `academic user &lt;https://www.h-its.org/software/pear-paired-end-read-merger/&gt;`_) or Vsearch (by default on Galaxy interface). PEAR is available only in command line.

- In un-merged Illumina data, and targeted amplicon size in the range of R1+R2-10, the reduction of dataset by the merged filter is classicaly inferior than 20%. A loss of more than 20% in all samples can highlight a quality problem.
-
- If the overlap between R1 and R2 is superior to 50 nucleotides and the quality of the end of the sequences is poor (see `FastQC &lt;http://www.bioinformatics.babraham.ac.uk/projects/fastqc/&gt;`_) you can try to cut the end of your sequences and relaunch the preprocess tool. You can either raise the mismatch percent in the overlapped region, but not too much!


 @HELP_CONTACT@
--- a/tool-data/frogs_picrust2_default_dir.loc.sample_back	Tue Jul 12 09:12:27 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-# Copyright (C) 2022 INRA
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-#
-#This is a sample file that enables tools FROGS_frogsfunc_placeseqs to place
-#studies sequences (i.e. OTUs) into a reference tree.
-#
-#PICRUSt2 files are present in the arborescence of PICRUSt2. Please consult
-#PICRUSt2 documentation to find out where they are located in your environment.
-#
-#If you are not comfortable with picrust2 command lines, just create .loc files
-#with EXAMPLE FOR TEST files remembering to replace <Galaxy_dir> with the real path.
-#
-#The indicated path is the one if you do not change the default galaxy config of conda directory.
-#
-#<identifiant>	<marker_gene>	<path_to_default_dir>
-#
-#default dir must contain these files:
-#
-#-rw-rw-r-- 1 vdarbot vdarbot 31922081 avril  1  2021 pro_ref.fna
-#-rwxrwxr-x 1 vdarbot vdarbot   272826 avril  1  2021 pro_ref.hmm
-#-rwxrwxr-x 1 vdarbot vdarbot      116 avril  1  2021 pro_ref.model
-#-rwxrwxr-x 1 vdarbot vdarbot     1707 avril  1  2021 pro_ref.raxml_info
-#-rwxrwxr-x 1 vdarbot vdarbot   600048 avril  1  2021 pro_ref.tre
-#
-# EXAMPLE FOR TEST :
-#picrust2_default_dir_16S	16S	<Galaxy_dir>/database/dependencies/_conda/envs/<PICRUSt2_env>/lib/python3.6/site-packages/picrust2/default_files/prokaryotic/pro_ref/
-#picrust2_default_dir_ITS	ITS	<Galaxy_dir>/database/dependencies/_conda/envs/<PICRUSt2_env>/lib/python3.6/site-packages/picrust2/default_files/fungi/fungi_ITS/
-#picrust2_default_dir_18S	18S	<Galaxy_dir>/database/dependencies/_conda/envs/<PICRUSt2_env>/lib/python3.6/site-packages/picrust2/default_files/fungi/fungi_18S/