comparison hyphy_gard.xml @ 35:69864510d68a draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit d97b1b98a3a621c93a7ed9e7db16bda47eefcb92
author iuc
date Tue, 07 Oct 2025 20:42:01 +0000
parents 02b1656d82a1
children
comparison
equal deleted inserted replaced
34:02b1656d82a1 35:69864510d68a
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="bio_tools"/> 6 <expand macro="bio_tools"/>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[ 8 <command detect_errors="exit_code"><![CDATA[
9 ln -s '$input_file' input.$input_file.extension && 9 @SYMLINK_FILES_NO_TREE@
10 #set $input_file = 'input.%s' % $input_file.extension 10 @HYPHYMPI@ gard
11 @HYPHYMPI@ gard 11 --alignment $input_file
12 --alignment ./$input_file
13 --type '$datatype.value' 12 --type '$datatype.value'
14 #if str($datatype.value) == 'codon': 13 #if str($datatype.value) == 'codon':
15 --code '$datatype.gencodeid' 14 --code '$datatype.gencodeid'
16 #elif str($datatype.value) == 'amino-acid': 15 #elif str($datatype.value) == 'amino-acid':
17 --model '$datatype.model' 16 --model '$datatype.model'
18 #end if 17 #end if
19 #if str($rate_cond.rate): 18 #if str($rate_cond.rate):
20 --rv '$rate_cond.rate' 19 --rv '$rate_cond.rate'
21 --rate-classes '$rate_cond.rate_classes' 20 --rate-classes '$rate_cond.rate_classes'
22 #end if 21 #end if
23 --output '$translated' 22 #if $advanced_options.max_breakpoints:
23 --max-breakpoints '$advanced_options.max_breakpoints'
24 #end if
25 #if $advanced_options.mode:
26 --mode '$advanced_options.mode'
27 #end if
28 ENV="TOLERATE_NUMERICAL_ERRORS=1;"
29 --output '$gard_output_json'
24 --output-lf '$gard_output' 30 --output-lf '$gard_output'
31 > gard_stdout.md
25 @ERRORS@ 32 @ERRORS@
26 ]]></command> 33 ]]></command>
27 <inputs> 34 <inputs>
28 <param name="input_file" type="data" format="fasta,fasta.gz,nex" label="Input FASTA or NEXUS file" /> 35 <param name="input_file" type="data" format="fasta,fasta.gz,nex" label="Input FASTA or NEXUS file" help="Input FASTA or NEXUS file." />
29 <conditional name="datatype"> 36 <conditional name="datatype">
30 <param argument="--type" name="value" type="select" label="Alignment kind"> 37 <param argument="--type" name="value" type="select" label="Alignment kind" help="Select the type of data to perform screening on.">
31 <option value="nucleotide">Nucleotide</option> 38 <option value="nucleotide">Nucleotide</option>
32 <option value="amino-acid">Amino acid</option> 39 <option value="amino-acid">Amino acid</option>
33 <option value="codon">Codon</option> 40 <option value="codon">Codon</option>
34 </param> 41 </param>
35 <when value="nucleotide"/> 42 <when value="nucleotide"/>
39 <when value="codon"> 46 <when value="codon">
40 <expand macro="gencode" /> 47 <expand macro="gencode" />
41 </when> 48 </when>
42 </conditional> 49 </conditional>
43 <conditional name="rate_cond"> 50 <conditional name="rate_cond">
44 <param argument="--rv" name="rate" type="select" label="Rate variation"> 51 <param argument="--rv" name="rate" type="select" label="Rate variation" help="Specify how site-to-site rate variation should be modeled.">
45 <option value="">None</option> 52 <option value="">None</option>
46 <option value="GDD">General Discrete</option> 53 <option value="GDD">General Discrete</option>
47 <option value="Gamma">Beta-Gamma</option> 54 <option value="Gamma">Beta-Gamma</option>
48 </param> 55 </param>
49 <when value=""/> 56 <when value=""/>
50 <when value="GDD"> 57 <when value="GDD">
51 <param argument="--rate-classes" type="integer" value="2" min="2" max="6" label="Rate classes" /> 58 <param argument="--rate-classes" type="integer" value="2" min="2" max="6" label="Rate classes" help="The number of discrete rate classes to use for modeling site-to-site rate variation." />
52 </when> 59 </when>
53 <when value="Gamma"> 60 <when value="Gamma">
54 <param argument="--rate-classes" type="integer" value="2" min="2" max="6" label="Rate classes" /> 61 <param argument="--rate-classes" type="integer" value="2" min="2" max="6" label="Rate classes" help="The number of discrete rate classes to use for modeling site-to-site rate variation." />
55 </when> 62 </when>
56 </conditional> 63 </conditional>
64 <section name="advanced_options" title="Advanced Options" expanded="false">
65 <param argument="--max-breakpoints" type="integer" value="10000" min="1" max="10000" label="Maximum number of breakpoints to consider" help="The maximum number of breakpoints the genetic algorithm will consider during its search."/>
66 <param argument="--mode" type="select" label="Run mode" help="Select the run mode for GARD. 'Normal' uses default optimization and convergence settings, while 'Faster' reduces precision and relaxes convergence for quicker results.">
67 <option value="Normal">Normal</option>
68 <option value="Faster">Faster</option>
69 </param>
70 </section>
57 </inputs> 71 </inputs>
58 <outputs> 72 <outputs>
59 <data name="gard_output" format="nex" /> 73 <data name="gard_output" format="nex" />
60 <data name="translated" format="hyphy_results.json" label="${tool.name} on ${on_string}: Translated" /> 74 <data name="gard_output_json" format="hyphy_results.json" label="${tool.name} on ${on_string}: gard_output_json" />
75 <data name="gard_md_report" format="markdown" from_work_dir="gard_stdout.md" label="GARD Report (Markdown) for ${tool.name} on ${on_string}" />
61 </outputs> 76 </outputs>
62 <tests> 77 <tests>
63 <test> 78 <test expect_num_outputs="3">
64 <param name="input_file" ftype="fasta" value="gard-in1.fa"/> 79 <param name="input_file" ftype="fasta" value="gard-in1.fa"/>
65 <output name="gard_output" file="gard-out1.nex" compare="sim_size"/> 80 <output name="gard_output" file="gard-out1.nex" compare="sim_size"/>
66 <output name="translated" file="gard-out1.json" compare="sim_size"/> 81 <output name="gard_output_json">
82 <assert_contents>
83 <has_text text='"potentialBreakpoints":21'/>
84 </assert_contents>
85 </output>
86 <output name="gard_md_report">
87 <assert_contents>
88 <has_text text="Done with 2 breakpoint analysis."/>
89 </assert_contents>
90 </output>
67 </test> 91 </test>
68 </tests> 92 </tests>
69 <help><![CDATA[ 93 <help><![CDATA[
70
71 GARD : Genetic Algorithms for Recombination Detection. 94 GARD : Genetic Algorithms for Recombination Detection.
72 ====================================================== 95 ======================================================
73 96
74 What does this do? 97 **What does this do?**
75 ------------------
76 98
77 This tools screens an alignment of sequences for evidence of recombination in one or more sequences. 99 This tool screens an alignment of sequences for evidence of recombination in one or more sequences.
78 The main idea is that if sufficient recombination has occurred, then no single phylogenetic tree will 100 The main idea is that if sufficient recombination has occurred, then no single phylogenetic tree will
79 properly fit the entire length of the alignment and instead a separate tree will be preferred for each *nonrecombinant* segment. 101 properly fit the entire length of the alignment and instead a separate tree will be preferred for each *nonrecombinant* segment.
80 102
81 Brief description 103 **Methodology**
82 -----------------
83 104
84 This analysis implements a heuristic approach to screening alignments of sequences for 105 GARD (Genetic Algorithm for Recombination Detection) implements a heuristic approach to screening alignments of sequences for recombination. It uses the CHC genetic algorithm to search for phylogenetic incongruence among different partitions of the data. The number of partitions is determined using a step-up procedure, while the placement of breakpoints is searched for with the GA. The best fitting model (based on c-AIC) is returned; and additional post-hoc tests run to distinguish topological incongruence from rate-variation.
85 recombination, by using the CHC genetic algorithm (GA) to search for
86 phylogenetic incongruence among different partitions of the data. The
87 number of partitions is determined using a step-up procedure, while the
88 placement of breakpoints is searched for with the GA. The best fitting
89 model (based on c-AIC) is returned; and additional post-hoc tests run to
90 distinguish topological incongruence from rate-variation.
91 106
92 For each identified breakpoint, the support for its placement is calculated, and for each 107 **The Intuition**
93 non-recombinant fragment, a phylogenetic tree is inferred (using neighbor joining) and returned.
94 108
95 Input 109 Imagine you have a long DNA sequence, and you suspect that different parts of this sequence might have evolved under different evolutionary histories due to recombination events. If you try to build a single phylogenetic tree for the entire sequence, it might not accurately represent the relationships between the organisms.
96 ----- 110
111 GARD addresses this by looking for "breakpoints" in the sequence where the evolutionary history changes. It uses a genetic algorithm to efficiently search for these breakpoints and then infers separate phylogenetic trees for each segment between the breakpoints. This allows for a more accurate understanding of the evolutionary history of recombinant sequences.
112
113 **Input**
97 114
98 A *FASTA* sequence alignment 115 A *FASTA* sequence alignment
99 116
100 Output 117 **Output**
101 ------
102 118
103 A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf). 119 A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf).
104 120
105 A custom visualization module for viewing these results is available (see http://vision.hyphy.org/GARD for an example) 121 A custom visualization module for viewing these results is available (see http://vision.hyphy.org/GARD for an example)
106 122
123 A Markdown file with a summary of the analysis.
107 124
108 Tool options 125 **Further reading**
109 ------------ 126
127
128 **Tool options**
110 :: 129 ::
111 130
112 131
113 --type type of alignment to screen 132 --type type of alignment to screen
114 Nucleotide [default]. 133 Nucleotide [default].
128 Default value: Universal 147 Default value: Universal
129 148
130 --model The substitution model to use (for protein alignments). 149 --model The substitution model to use (for protein alignments).
131 default value: JTT 150 default value: JTT
132 151
133 --rv The discrete distribution to use for modeling site to site rate variation. 152 --rv Site to site rate variation.
134 153 None: Constant rates.
135 None [default] 154 Gamma: Unit mean gamma distribution discretized into N rates.
136 No rate variation. This is the fastest option in terms of run time, but 155 GDD: General discrete distribution on N rates.
137 using it can result in false positives if there is significant site-to-site
138 rate variation
139 GDD
140 Use the general discrete distribution on N bins
141 Beta-Gamma
142 Use a discretized gamma with weights partitioned by a discretized beta
143 (see doi.org/10.1093/molbev/msi009)
144 156
145 --rate-classes How many site rate classes to use (if GDD or Beta-Gamma are selected) 157 --rate-classes How many site rate classes to use (if GDD or Beta-Gamma are selected)
146 default value: 4 158 default value: 4
147 159
160 --max-breakpoints Maximum number of breakpoints to consider.
148 161
149 ]]></help> 162 --mode Run mode.
163 Normal: Default optimization and convergence settings.
164 Faster: Reduce individual optimization precision and relax convergence settings.
165
166 ]]>
167 </help>
150 <expand macro="citations"> 168 <expand macro="citations">
151 <citation type="doi">10.1093/molbev/msl051</citation> 169 <citation type="doi">10.1093/molbev/msl051</citation>
152 </expand> 170 </expand>
153 </tool> 171 </tool>