Mercurial > repos > oinizan > frogs

diff frogsfunc_functions.xml @ 21:74a9b83110a7 draft
planemo upload for repository https://github.com/geraldinepascal/FROGS-wrappers/ commit f481f7af0d70c862c493d55f386b375e7f968c5c-dirty
author: oinizan
date: Thu, 30 Mar 2023 06:09:01 +0000
parents: ca1e9adbde51
children: 57824202c333
--- a/frogsfunc_functions.xml	Fri Mar 10 14:03:08 2023 +0000
+++ b/frogsfunc_functions.xml	Thu Mar 30 06:09:01 2023 +0000
@@ -15,14 +15,15 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 -->
-<tool id="FROGSFUNC_step3_functions" name="FROGSFUNC_step3_functions" version= "@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+<tool id="FROGSFUNC_step3_functions" name="FROGSFUNC_2_functions" version= "@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
     <description>Calculates functions abundances in each sample.</description>
 
-    <macros>
+  <macros>
         <import>macros.xml</import>
-    </macros>
+  </macros>
 
-    <expand macro="requirements_frogsfunc" />
+  <expand macro="requirements_frogsfunc" />
+
 
     <stdio>
         <exit_code range="1:" />
@@ -30,33 +31,112 @@
     </stdio>
     <command >
        frogsfunc_functions.py
+            @CPUS@
             --input-biom $input_biom
             --input-fasta $input_fasta
-            --input-function $function
-            --input-marker $marker
+            --input-tree $input_tree
+            --input-marker $input_marker
+            --marker-type $category.value
+            #if $category.value == "16S"
+                --functions $functions
+            #end if
+            #if $category.value != "16S"
+                --input-function-table $functions.fields.traits
+            #end if 
             --max-nsti $max_nsti
-            --output-function-abund $function_abund
-            --seqtab $seqtab
-            --weighted $weighted
-            --excluded $excluded
+            --min-blast-ident $min_blast_ident
+            --min-blast-cov $min_blast_cov
+	   		--hsp-method $hsp_method
+            --output-biom $output_biom
+            --output-fasta $output_fasta
+            --output-function-abund "frogsfunc_functions_unstrat.tsv"
+            --output-otu-norm $output_otu_norm
+            --output-weighted $output_weighted
+            --output-excluded $output_excluded
             --summary $summary_file
+
     </command> 
     <inputs>
         <!-- Input files -->
-        <param argument="--input-biom" format="biom1" name="input_biom" type="data" label="Biom file" help="The abundance file i.e. FROGSFUNC_step1_placeseqs tool output file (frogsfunc_placeseqs.biom)." optional="false"/>
-       	<param argument="--input-fasta" format="fasta" name="input_fasta" type="data" label="Sequence file" help="The fasta file i.e. from FROGSFUNC_step1_placeseqs tool output file (frogsfunc_placeseqs.fasta)." optional="false"/>
-        <param argument='--input-function' format="tsv" type="data" label="Function file" help="Copy number table of functions present in the predicted genome for each OTU i.e. FROGSFUNC_step2_copynumbers tool output file (frogsfunc_copynumbers_predicted_functions.tsv)." optional="false"/>
-        <param argument='--input-marker' format="tsv" type="data" label="Marker file" help="Table of predicted marker copy number i.e. FROGSFUNC_step2_copynumbers output (frogsfunc_copynumbers_marker.tsv)." optional="false"/>
+        <param argument="--input-biom" format="biom1" type="data" label="Biom file" help="The abundance file i.e. FROGSFUNC_1_placeseqs_copynumber tool output file (frogsfunc_placeseqs.biom)." optional="false"/>
+       	<param argument="--input-fasta" format="fasta" type="data" label="Sequence file" help="The fasta file i.e. from FROGSFUNC_1_placeseqs_copynumber tool output file (frogsfunc_placeseqs.fasta)." optional="false"/>
+        <param argument="--input-tree" format="nhx" type="data" label="Tree file" help="The file contains the tree information from FROGSFUNC_1_placeseqs_copynumber tool (frogsfunc_placeseqs_tree.nwk)." optional="false"/>
+        <param argument="--input-marker" format="tsv" type="data" label="Marker file" help="Table of predicted marker copy number i.e. FROGSFUNC_1_placeseqs_copynumber output (frogsfunc_marker.tsv)." optional="false"/>
         
         <!-- Parameters-->
-        <param argument="--max-nsti" name="max_nsti" type="float" label="NSTI cut-off" help="Any sequence with an NSTI above this threshold will be out. (default: 2)" value="2" min="0" optional="false" /> 
+	    <param name="category" type="select" label="Taxonomic marker" help="Taxonomic marker of interest." multiple="false" display="radio">
+            <options from_data_table="frogs_picrust2_marker_table">
+                <column name='name' index='0' />
+                <column name='value' index='0' />
+                <filter type="unique_value" column='0'/>
+                    <validator type="no_options" message="A built-in database is not available" />
+            </options>
+		</param>
+		<param argument="--functions" type="select" label="Target function database" multiple="true" optional="false" help=" 16S : at least 'EC' or/and 'KO' should be chosen (EC for Metacyc pathway analysis or/and KO for KEGG pathway analysis) - others values are optionnal. ITS and 18S : 'EC' only available." >
+			<options from_data_table="frogs_picrust2_marker_table">
+				<column name='name' index='1' />
+				<column name='value' index='1' />
+				<column name='path' index='2' />
+				<column name='traits' index='3' />
+                <filter type="param_value" ref="category" column="0" />   
+ 		<validator type="expression" message="'EC' is the default database used by PICRUSt2. 'EC' or 'KO' must be at least selected. Other tables are optionnal">"EC" in value or "KO" in value</validator>               
+            </options>
+        </param>
+        <param argument="--max-nsti" type="float" label="NSTI cut-off" help="Any sequence with an NSTI above this threshold will be out. (default: 2)" value="2" min="0" optional="false" />
+        <param argument="--min-blast-ident" type="float" label="Identity alignment cut-off" help="Percentage identity of the alignment between the input sequence and the PICRUSt2 reference sequence. Below this threshold, all sequences will be discarded. (default: None)" value="0" min="0" max="1" optional="true" />
+        <param argument="--min-blast-cov" type="float" label="Coverage alignment cut-off" help="Coverage identity of the alignment between the input sequence and the PICRUSt2 reference sequence. Below this threshold, all sequences will be discarded.  (default: None)" value="0" min="0" max="1" optional="true" />
+		<param argument="--hsp-method" type="select" label="HSP method" help="Hidden-state prediction method to use: maximum parsimony (mp), empirical probabilities (emp_prob), continuous traits prediction using subtree averaging (subtree_average), continuous traits prediction with phylogentic independent contrast (pic), continuous traits reconstruction using squared-change parsimony (scp) (default: mp)." multiple="false" display="radio">
+            <option value="mp">mp</option>
+            <option value="emp_prob">emp_prob</option>
+            <option value="pic">pic</option>
+            <option value="scp">scp</option>
+            <option value="subtree_average">subtree_average</option>
+		</param>
     </inputs>
     <outputs>
         <data format="html" name="summary_file" label="${tool.name}: report.html" from_work_dir="report.html"/>
-        <data format="tsv" name="seqtab" label="${tool.name}: frogsfunc_functions_marker_norm.tsv" from_work_dir="frogsfunc_functions_marker_norm.tsv.tsv"/> 
-        <data format="tsv" name="weighted" label="${tool.name}: frogsfunc_functions_weighted_nsti.tsv" from_work_dir="frogsfunc_functions_weighted_nsti.tsv"/> 
-        <data format="tsv" name="excluded" label="${tool.name}: frogsfunc_functions_excluded.tsv" from_work_dir="frogsfunc_functions_excluded.tsv"/>
-        <data format="tsv" name="function_abund" label="${tool.name}:   frogsfunc_functions_unstrat.tsv" from_work_dir=" frogsfunc_functions_unstrat.tsv"/> 
+		<data format="biom1" name="output_biom" label="${tool.name}: frogsfunc_functions.biom" from_work_dir="frogsfunc_functions.biom"/>
+		<data format="fasta" name="output_fasta" label="${tool.name}: frogsfunc_functions.fasta" from_work_dir="frogsfunc_functions.fasta"/>
+
+        <data format="tsv" name="output_otu_norm" label="${tool.name}: frogsfunc_functions_marker_norm.tsv" from_work_dir="frogsfunc_functions_marker_norm.tsv.tsv"/> 
+        <data format="tsv" name="output_weighted" label="${tool.name}: frogsfunc_functions_weighted_nsti.tsv" from_work_dir="frogsfunc_functions_weighted_nsti.tsv"/> 
+        <data format="tsv" name="output_excluded" label="${tool.name}: frogsfunc_functions_excluded.tsv" from_work_dir="frogsfunc_functions_excluded.tsv"/>
+        <data format="tsv" name="output_copy_ec_abund" label="${tool.name}: EC_copynumbers_predicted.tsv" from_work_dir="EC_copynumbers_predicted.tsv">
+            <filter>"EC" in functions</filter>
+        </data>
+        <data format="tsv" name="output_copy_ko_abund" label="${tool.name}: KO_copynumbers_predicted.tsv" from_work_dir="KO_copynumbers_predicted.tsv">
+            <filter>"KO" in functions</filter>
+        </data>
+        <data format="tsv" name="output_copy_cog_abund" label="${tool.name}: COG_copynumbers_predicted.tsv" from_work_dir="COG_copynumbers_predicted.tsv">
+            <filter>"COG" in functions</filter>
+        </data>
+        <data format="tsv" name="output_copy_pfam_abund" label="${tool.name}: PFAM_copynumbers_predicted.tsv" from_work_dir="PFAM_copynumbers_predicted.tsv">
+            <filter>"PFAM" in functions</filter>
+        </data>
+        <data format="tsv" name="output_copy_tigrfam_abund" label="${tool.name}: TIGRFAM_copynumbers_predicted.tsv" from_work_dir="TIGRFAM_copynumbers_predicted.tsv">
+            <filter>"TIGRFAM" in functions</filter>
+        </data>
+        <data format="tsv" name="output_copy_pheno_abund" label="${tool.name}: PHENO_copynumbers_predicted.tsv" from_work_dir="PHENO_copynumbers_predicted.tsv">
+            <filter>"PHENO" in functions</filter>
+        </data>
+        <data format="tsv" name="output_function_ec_abund" label="${tool.name}:  frogsfunc_functions_unstrat_EC.tsv" from_work_dir="frogsfunc_functions_unstrat_EC.tsv">
+            <filter>"EC" in functions</filter>
+        </data>
+        <data format="tsv" name="output_function_ko_abund" label="${tool.name}:  frogsfunc_functions_unstrat_KO.tsv" from_work_dir="frogsfunc_functions_unstrat_KO.tsv">
+            <filter>"KO" in functions</filter>
+        </data>
+        <data format="tsv" name="output_function_cog_abund" label="${tool.name}:  frogsfunc_functions_unstrat_COG.tsv" from_work_dir="frogsfunc_functions_unstrat_COG.tsv">
+            <filter>"COG" in functions</filter>
+        </data>
+        <data format="tsv" name="output_function_pfam_abund" label="${tool.name}:  frogsfunc_functions_unstrat_PFAM.tsv" from_work_dir="frogsfunc_functions_unstrat_PFAM.tsv">
+            <filter>"PFAM" in functions</filter>
+        </data>
+        <data format="tsv" name="output_function_tigrfam_abund" label="${tool.name}:  frogsfunc_functions_unstrat_TIGRFAM.tsv" from_work_dir="frogsfunc_functions_unstrat_TIGRFAM.tsv">
+            <filter>"TIGRFAM" in functions</filter>
+        </data>
+        <data format="tsv" name="output_function_pheno_abund" label="${tool.name}:  frogsfunc_functions_unstrat_PHENO.tsv" from_work_dir="frogsfunc_functions_unstrat_PHENO.tsv">
+            <filter>"PHENO" in functions</filter>
+        </data>
     </outputs>
 
 
@@ -70,11 +150,11 @@
             <param name="min_samples" value="1" />
             <param name="strat" value="false" />
 
-            <output name="function_abund" file="references/27-frogsfunc_functions_unstrat.tsv" compare="diff" lines_diff="0" />
-            <output name="seqtab" file="references/27-frogsfunc_functions_marker_norm.tsv" compare="diff" lines_diff="0" />
-            <output name="weighted" file="references/27-frogsfunc_functions_weighted_nsti.tsv" compare="diff" lines_diff="0" />
+            <output name="output_function_abund" file="references/27-frogsfunc_functions_unstrat.tsv" compare="diff" lines_diff="0" />
+            <output name="output_otu_norm" file="references/27-frogsfunc_functions_marker_norm.tsv" compare="diff" lines_diff="0" />
+            <output name="output_weighted" file="references/27-frogsfunc_functions_weighted_nsti.tsv" compare="diff" lines_diff="0" />
             <output name="summary_file" file="references/27-frogsfunc_functions_report.html" compare="diff" lines_diff="0" />
-            <output name="excluded" file="references/27-frogsfunc_functions_excluded.txt" compare="diff" lines_diff="0" />
+            <output name="output_excluded" file="references/27-frogsfunc_functions_excluded.txt" compare="diff" lines_diff="0" />
         </test>
     </tests>
 
@@ -86,13 +166,18 @@
 
 What it does
 
-Predicting of functions weighted by the relative abundance of OTUs in the community. Inferring the metagenomes of the communities with `PICRUSt2 &lt;https://github.com/picrust/picrust2&gt;`_.
-There are two steps performed at this stage:
+FROGSFUNC_2_functions is the second step of PICRUSt2. It ables to predicts : 
+	(i) Functional abundances based solely on the sequences of marker genes with PICRUSt2. The available marker genes are 16S, ITS and 18S.
+
+	(ii) Functions, weighted by the relative abundance of ASVs in the community. Inferring the metagenomes of the communities with `PICRUSt2 &lt;https://github.com/picrust/picrust2&gt;`_.
+
 
-    (i) The read depth per OTU is divided by the predicted marker (16S/ITS/18S) copy numbers. This is performed to help control for variation in marker copy numbers across organisms, which can result in interpretation issues.
-        For instance, imagine an organism with five identical copies of the 16S gene that is at the same absolute abundance as an organism with one 16S gene. The OTU corresponding to the first organism would erroneously be inferred to be at higher relative abundance simply because this organism had more copies of the 16S gene.
-         
-    (ii) The OTU read depths per sample (after normalizing by marker (16S/ITS/18S) copy number) are multiplied by the predicted function copy numbers per OTU.         
+There are three steps performed at this stage:
+	(i) It runs hidden-state prediction (hsp) to predict function abundances with castor-R of each ASVs placed in the PICRUSt2 reference phylogenetic tree (FROGSFUNC_1_placeseqs_copynumber outputs).
+	
+    (ii) The read depth per ASV is divided by the predicted marker (16S/ITS/18S) copy numbers. This is performed to help control for variation in marker copy numbers across organisms, which can result in interpretation issues. For instance, imagine an organism with five identical copies of the 16S gene that is at the same absolute abundance as an organism with one 16S gene. The ASV corresponding to the first organism would erroneously be inferred to be at higher relative abundance simply because this organism had more copies of the 16S gene.
+        
+    (iii) The ASV read depths per sample (after normalizing by marker (16S/ITS/18S) copy number) are multiplied by the predicted function copy numbers per ASV.         
 
 
 .. class:: infomark page-header h2
@@ -105,65 +190,132 @@
 Inputs
 
 
-**Biom file**:
+**-Biom file-**:
+
+The ASVs biom file from FROGSFUNC_1_placeseqs_copynumber tool (format `biom1 &lt;http://biom-format.org/documentation/format_versions/biom-1.0.html&gt;`_). (FROGSFUNC_1_placeseqs_copynumber.biom from FROGSFUNC_1_placeseqs_copynumber)
+
+**-Sequence file-**:
 
- The OTUs biom file from FROGSFUNC_step1_placeseqs tool (format `biom1 &lt;http://biom-format.org/documentation/format_versions/biom-1.0.html&gt;`_). (frogsfunc_placeseqs.biom)
+The sequence file of inserted ASVs into PICRUST2 reference tree from (frogsfunc_placesesqs.fasta from FROGSFUNC_1_placeseqs_copynumber step).
+
+**-Tree file (format newick nwk)-**:
+
+The file contains the tree informations from FROGSFUNC_1_placeseqs_copynumber step (FROGSFUNC_1_placeseqs_copynumber output : FROGSFUNC_1_placeseqs_copynumber_tree.nwk)
 
-**Function file**:
+**-Marker file-**:
+
+Output table of predicted marker gene copy numbers per sequence. (frogsfunc_marker.tsv from FROGSFUNC_1_placeseqs_copynumber step)
+
+.. class:: h3
 
- The table of predicted function abundance from FROGSFUNC_step2_copynumbers tool. (frogsfunc_copynumbers_predicted_functions.tsv)
+Parameters
+
+
+**-Taxonomic marker-**:
+
+Marker gene to be analyzed from the previous FROGSFUNC_1_placeseqs_copynumber step (frogsfunc_marker.tsv from FROGSFUNC_1_placeseqs_copynumber).
+
+**-Target function database-**:
 
-**Marker file**:
+Which default pre-calculated count table to use ?
+ - For 16S rRNA gene you can choose between: 'EC', 'KO', 'PFAM', 'COG', 'TIGRFAM', and/or 'PHENO'. You must select at least 'EC' or 'KO' because for next FROGSFUNC tools, the information from Metacyc (EC) or KEGG (KO) are requiered.
+ - For ITS and 18S markers, 'EC' is only available.
+
+For more informations about the different databases:
 
- Output table of predicted marker gene copy numbers per sequence from FROGSFUNC_step2_copynumbers tool. (frogsfunc_copynumbers_marker.tsv)
+ - EC : https://enzyme.expasy.org/
+ - KO : https://www.genome.jp/kegg/ko.html
+ - PFAM : http://pfam.xfam.org/
+ - COG : https://www.ncbi.nlm.nih.gov/research/cog-project/
+ - TIGRFAM : https://tigrfams.jcvi.org/cgi-bin/index.cgi
+ - PHENO : https://phenodb.org/
+
+**-NSTI cut-off-**:
 
-**NSTI cut-off**:
+ Nearest Sequenced Taxon Index (`NSTI &lt;https://www.nature.com/articles/nbt.2676&gt;`_) is the phylogenetic distance between the ASV and the nearest sequenced reference genome. This metric can be used to identify ASVs that are highly distant from all reference sequences (the predictions for these sequences are less reliable!). The higher the NSTI score, the less the affiliations are relevant. Any ASVs with a NSTI value higher than 2 are typically either from uncharacterized phyla or off-target sequences.
+
+**-Identity alignment cut-off-**:
+
+ All sequences with a identity percentage of alignment against the PICRUSt2 closest reference sequence is lower than this value will be excluded (between 0 and 1).
 
- Nearest Sequenced Taxon Index (`NSTI &lt;https://www.nature.com/articles/nbt.2676&gt;`_) is the phylogenetic distance between the OTU and the nearest sequenced reference genome. This metric can be used to identify OTUs that are highly distant from all reference sequences (the predictions for these sequences are less reliable!). The higher the NSTI score, the less the affiliations are relevant. Any OTUs with a NSTI value higher than 2 are typically either from uncharacterized phyla or off-target sequences. 
+**-Coverage alignment cut-off-**:
+ 
+ All sequences with a coverage percentage of alignment against the PICRUSt2 closest reference sequence is lower than this value will be excluded (between 0 and 1).
+
+**-HSP method-**:
+
+ Hidden-state prediction method to use.
+
          
-
 .. class:: h3
 
 Outputs
 
-**Report file**: (report.html)
+**-Fasta file-**:
+
+ Sequence file without excluded ASVs (NSTI, blast perc identity or blast perc coverage thresholds). (FROGSFUNC_2_functions.fasta)
+
+**-ASV abundance Biom file - one per chosen target function database (EC, KO, PFAM, COG, TIGRFAM,PHENO)-**:
+
+ ASV abundance data i a biom file without excluded ASVs (NSTI, %identity or %coverage thresholds alignment). (FROGSFUNC_2_functions.biom)
+
+**-Function abundance file-**:
+ 
+ It is the function abundance predictions of metagenome, per sample. (frogsfunc_functions_unstrat_DATABASENAME.tsv, for exemple: FROGSFUNC_2_functions_unstrat_EC.tsv)
+ 
+Table column description:
+ - classification: the hierarchy classification of the gene function. 
+ - db_link: the url on the link accession ID (*observation_name*) of the function. 
+ - observation_name: Accession identifier
+ - observation_sum: Total abundance of functions across all samples.
+ - last columns: Abundances of these functions in each samples.
+
+**-ASV normalized abundance table-**:
+
+ Table with normalized abundances per marker copy number from FROGSFUNC_1 step. (FROGSFUNC_2_functions_marker_norm.tsv)
+
+**-Weighted NSTI file-**:
+
+ Output file with the mean of NSTI value per sample. (FROGSFUNC_2_functions_weighted_nsti.tsv)
+
+**-Excluded sequences file-**:
+ 
+Information about removed sequences that have a NSTI value aboved the NSTI threshold chosen in this step:
+ - ASV: ASV name id.
+ - FROGS_taxonomy
+ - PICRUSt2_taxonomy
+ - exclusion_paramater: The paramater(s) that excluded the ASVs.
+ - value_parameter: The values associated with the paramater(s).
+
+**-Copy number marker file - one per chosen target function database (EC, KO, PFAM, COG, TIGRFAM,PHENO)-**:
+
+Output table of predicted function copy numbers per ASV. There are as many tables as chosen target function database (EC, KO, PFAM, COG, TIGRFAM,PHENO) (exemple : FROGSFUNC_step3_functions: EC_copynumbers_predicted.tsv and FROGSFUNC_step3_functions: PHENO_copynumbers_predicted.tsv )
+
+**-Report file-**: (report.html)
 
 .. image:: FROGS_frogsfunc_functions_piechart.png
-    :height: 500
-    :width: 1352
+    :height: 375
+    :width: 1014
+
+ASVs are excluded if the associated NSTI is above the threshold, or if the alignment values are below the thresholds.
+
 
-OTUs are out if the NSTI associated is above the threshold.
+.. image:: FROGS_frogsfunc_functions_starplot.png
+    :height: 466
+    :width: 806
+
+Number of different taxonomic ranks before (green) and after (orange) application of the filters. 
+
 
 .. image:: FROGS_frogsfunc_functions_table.png
     :height: 580
-    :width: 1352
+    :width: 1452
  
 
 .. image:: FROGS_frogsfunc_functions_sunburst.png
 
 
-Gene families/function from KEGG or Metacyc databases are classified according to 3 hierarchy levels. The graph shows the proportion of each level within the selected samples.
-
-**Function abundance file**:
- 
- It is the function abundance predictions of metagenome, per sample. (frogsfunc_functions_unstrat.tsv)
- 
- - Classification column: the hierarchy classification of the gene function. 
- - db_link column: the url on the link accession ID (*observation_name*) of the function. 
- - observation_name: Accession identifier 
- - last columns: Abundances of these functions in each samples.
-
-**Excluded sequences**:
-
- Information (FROGS taxonomy, PICRUSt2 taxonomy, NSTI) about removed sequences that have a NSTI value aboved the NSTI threshold chosen in this step.
-
-**Normalized OTU abundance table**:
-
- Table with normalized abundance per marker copy number. (frogsfunc_functions_marker_norm.tsv)
-
-**Weighted NSTI file**:
-
- It is the table with average NSTI calculated per sample. (frogsfunc_functions_weighted_nsti.tsv)
+Gene families/function from KEGG or Metacyc databases are classified according to 4 hierarchy levels. The graph shows the proportion of each level within the selected samples.
author	oinizan
date	Thu, 30 Mar 2023 06:09:01 +0000
parents	ca1e9adbde51
children	57824202c333