Mercurial > repos > iuc > hyphy_fubar

diff hyphy_fubar.xml @ 36:da919379e8e4 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit d97b1b98a3a621c93a7ed9e7db16bda47eefcb92
author: iuc
date: Tue, 07 Oct 2025 20:40:57 +0000
parents: d44c0b7a6cb8
--- a/hyphy_fubar.xml	Thu Mar 02 15:08:29 2023 +0000
+++ b/hyphy_fubar.xml	Tue Oct 07 20:40:57 2025 +0000
@@ -13,30 +13,48 @@
             @INPUT_TREE@
             --code '$gencodeid'
             --method '$posteriorEstimationMethod.method'
-            --grid '$grid_points'
             @posteriorEstimationMethod_cmd@
-            --concentration_parameter '$concentration'
+            --grid '$advanced_options.grid_points'
+            --concentration_parameter '$advanced_options.concentration'
+            --non-zero $advanced_options.non_zero
+            --kill-zero-lengths $advanced_options.kill_zero_lengths
+            > fubar_stdout.md 
         @ERRORS@
     ]]></command>
     <inputs>
         <expand macro="inputs"/>
         <expand macro="gencode"/>
-        <param argument="--grid" name="grid_points" type="integer" value="20" min="5" max="50" label="Grid points" />
         <expand macro="conditional_posteriorEstimationMethod" />
-        <param argument="--concentration_parameter" name="concentration" type="float" value="0.5" min="0.001" max="1" label="Concentration parameter of the Dirichlet prior" />
+        <section name="advanced_options" title="Advanced Options" expanded="false">
+            <param argument="--grid" name="grid_points" type="integer" value="20" min="5" max="50" label="Grid points" help="The number of grid points used to approximate the posterior distribution of dN and dS." />
+            <param argument="--concentration_parameter" name="concentration" type="float" value="0.5" min="0.001" max="1" label="Concentration parameter of the Dirichlet prior" help="The concentration parameter of the Dirichlet prior on the grid weights." />
+            <param argument="--non-zero" type="boolean" truevalue="Yes" falsevalue="No" label="Enforce non-zero synonymous rates" help="Enforce non-zero synonymous rates on the grid. This is useful for calculating dN/dS ratios, as it prevents division by zero."/>
+            <expand macro="kill_zero_lengths_param"/>
+        </section>
 
     </inputs>
     <outputs>
         <data name="fubar_output" format="hyphy_results.json" />
+        <data name="fubar_md_report" format="markdown" from_work_dir="fubar_stdout.md" label="FUBAR Report (Markdown) for ${tool.name} on ${on_string}" />
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="2">
             <param name="input_file" ftype="fasta.gz" value="fubar-in1.fa.gz"/>
             <param name="input_nhx" ftype="nhx" value="fubar-in1.nhx"/>
             <conditional name="posteriorEstimationMethod">
                 <param name="method" value="Variational-Bayes"/>
             </conditional>
-            <output name="fubar_output" file="fubar-out1.json" compare="sim_size"/>
+            <output name="fubar_output">
+                <assert_contents>
+                    <has_text text="Empiricial Bayes Factor for positive selection at a site"/>
+                </assert_contents>
+            </output>
+            <output name="fubar_md_report">
+                <assert_contents>
+                    <has_text text="Running an iterative zeroth order variational Bayes procedure to estimate the posterior mean of rate weights"/>
+                    <has_text text="### Tabulating site-level results"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help><![CDATA[
@@ -61,12 +79,17 @@
 Brief description
 -----------------
 
-Perform a Fast Unbiased AppRoximate Bayesian (FUBAR) analysis of a
-coding sequence alignment to determine whether some sites have been
-subject to pervasive purifying or diversifying selection. There are three methods
-for estimating the posterior distribution of
-grid weights: collapsed Gibbs MCMC (faster), 0-th order Variation
-Bayes approximation (fastest), full Metropolis-Hastings (slowest).
+FUBAR (Fast, Unconstrained Bayesian AppRoximation) is a Bayesian method for detecting site-specific positive and negative selection. It is designed to be fast and efficient, making it suitable for large datasets.
+
+The core idea behind FUBAR is to model the non-synonymous (dN) and synonymous (dS) substitution rates at each site in a codon alignment. The ratio of these rates (dN/dS, or omega) is a measure of the selective pressure acting on a site. An omega value greater than 1 indicates positive (diversifying) selection, a value less than 1 indicates negative (purifying) selection, and a value of 1 indicates neutral evolution.
+
+FUBAR uses a Bayesian approach to infer the posterior distribution of dN and dS at each site. It does this by discretizing the dN and dS rates into a grid of points and then using a Bayesian graphical model to infer the posterior probability of each grid point for each site. This approach is much faster than traditional MCMC-based methods, which require long run times to converge.
+
+FUBAR offers three different methods for estimating the posterior distribution:
+
+*   **Variational-Bayes:** A fast approximation method that is the recommended default.
+*   **Collapsed-Gibbs:** A faster MCMC method.
+*   **Metropolis-Hastings:** The original, slowest MCMC method.
 
 Input
 -----
@@ -97,33 +120,33 @@
 
     --code             Which genetic code to use
 
-    --grid             The number of grid points
-                        Smaller : faster
-                        Larger : more precise posterior estimation but slower
-                        default value: 20
+    --grid             The number of grid points used to approximate the posterior distribution of dN and dS. A larger grid will provide a more accurate approximation but will also be slower. The default value of 20 is a good compromise between speed and accuracy.
 
-    --method           Inference method to use
+    --method           The inference method to use for estimating the posterior distribution.
                             Variational-Bayes : 0-th order Variational Bayes approximation; fastest [default]
                             Metropolis-Hastings : Full Metropolis-Hastings MCMC algorithm; orignal method [slowest]
                             Collapsed-Gibbs  : Collapsed Gibbs sampler [intermediate speed]
 
 
-    --chains           How many MCMC chains to run (does not apply to Variational-Bayes)
+    --chains           The number of MCMC chains to run. This is only applicable to the Metropolis-Hastings and Collapsed-Gibbs methods. A larger number of chains will provide a better exploration of the posterior distribution but will also be slower.
                             default value: 5
 
-    --chain-length     MCMC chain length (does not apply to Variational-Bayes)
+    --chain-length     The length of each MCMC chain. This is only applicable to the Metropolis-Hastings and Collapsed-Gibbs methods. A longer chain will provide a better exploration of the posterior distribution but will also be slower.
                             default value: 2,000,000
 
-    --burn-in          MCMC chain burn in (does not apply to Variational-Bayes)
+    --burn-in          The number of samples to discard from the beginning of each MCMC chain. This is done to ensure that the chain has converged to the posterior distribution. This is only applicable to the Metropolis-Hastings and Collapsed-Gibbs methods.
                             default value: 1,000,000
 
-    --samples          MCMC samples to draw (does not apply to Variational-Bayes)
+    --samples          The number of samples to draw from each MCMC chain after the burn-in period. These samples are used to estimate the posterior distribution. This is only applicable to the Metropolis-Hastings and Collapsed-Gibbs methods.
                             default value: 1,000
 
     --concentration_parameter
-                        The concentration parameter of the Dirichlet prior
+                        The concentration parameter of the Dirichlet prior on the grid weights.
                         default value: 0.5
 
+    --non-zero          Enforce non-zero synonymous rates on the grid. This is useful for calculating dN/dS ratios, as it prevents division by zero.
+
+    --kill-zero-lengths Automatically delete internal zero-length branches for computational efficiency. This will not affect the results.
 
     ]]></help>
     <expand macro="citations">
author	iuc
date	Tue, 07 Oct 2025 20:40:57 +0000
parents	d44c0b7a6cb8
children