Mercurial > repos > iuc > hyphy_gard
comparison hyphy_gard.xml @ 35:69864510d68a draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit d97b1b98a3a621c93a7ed9e7db16bda47eefcb92
| author | iuc |
|---|---|
| date | Tue, 07 Oct 2025 20:42:01 +0000 |
| parents | 02b1656d82a1 |
| children |
comparison
equal
deleted
inserted
replaced
| 34:02b1656d82a1 | 35:69864510d68a |
|---|---|
| 4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
| 5 </macros> | 5 </macros> |
| 6 <expand macro="bio_tools"/> | 6 <expand macro="bio_tools"/> |
| 7 <expand macro="requirements"/> | 7 <expand macro="requirements"/> |
| 8 <command detect_errors="exit_code"><![CDATA[ | 8 <command detect_errors="exit_code"><![CDATA[ |
| 9 ln -s '$input_file' input.$input_file.extension && | 9 @SYMLINK_FILES_NO_TREE@ |
| 10 #set $input_file = 'input.%s' % $input_file.extension | 10 @HYPHYMPI@ gard |
| 11 @HYPHYMPI@ gard | 11 --alignment $input_file |
| 12 --alignment ./$input_file | |
| 13 --type '$datatype.value' | 12 --type '$datatype.value' |
| 14 #if str($datatype.value) == 'codon': | 13 #if str($datatype.value) == 'codon': |
| 15 --code '$datatype.gencodeid' | 14 --code '$datatype.gencodeid' |
| 16 #elif str($datatype.value) == 'amino-acid': | 15 #elif str($datatype.value) == 'amino-acid': |
| 17 --model '$datatype.model' | 16 --model '$datatype.model' |
| 18 #end if | 17 #end if |
| 19 #if str($rate_cond.rate): | 18 #if str($rate_cond.rate): |
| 20 --rv '$rate_cond.rate' | 19 --rv '$rate_cond.rate' |
| 21 --rate-classes '$rate_cond.rate_classes' | 20 --rate-classes '$rate_cond.rate_classes' |
| 22 #end if | 21 #end if |
| 23 --output '$translated' | 22 #if $advanced_options.max_breakpoints: |
| 23 --max-breakpoints '$advanced_options.max_breakpoints' | |
| 24 #end if | |
| 25 #if $advanced_options.mode: | |
| 26 --mode '$advanced_options.mode' | |
| 27 #end if | |
| 28 ENV="TOLERATE_NUMERICAL_ERRORS=1;" | |
| 29 --output '$gard_output_json' | |
| 24 --output-lf '$gard_output' | 30 --output-lf '$gard_output' |
| 31 > gard_stdout.md | |
| 25 @ERRORS@ | 32 @ERRORS@ |
| 26 ]]></command> | 33 ]]></command> |
| 27 <inputs> | 34 <inputs> |
| 28 <param name="input_file" type="data" format="fasta,fasta.gz,nex" label="Input FASTA or NEXUS file" /> | 35 <param name="input_file" type="data" format="fasta,fasta.gz,nex" label="Input FASTA or NEXUS file" help="Input FASTA or NEXUS file." /> |
| 29 <conditional name="datatype"> | 36 <conditional name="datatype"> |
| 30 <param argument="--type" name="value" type="select" label="Alignment kind"> | 37 <param argument="--type" name="value" type="select" label="Alignment kind" help="Select the type of data to perform screening on."> |
| 31 <option value="nucleotide">Nucleotide</option> | 38 <option value="nucleotide">Nucleotide</option> |
| 32 <option value="amino-acid">Amino acid</option> | 39 <option value="amino-acid">Amino acid</option> |
| 33 <option value="codon">Codon</option> | 40 <option value="codon">Codon</option> |
| 34 </param> | 41 </param> |
| 35 <when value="nucleotide"/> | 42 <when value="nucleotide"/> |
| 39 <when value="codon"> | 46 <when value="codon"> |
| 40 <expand macro="gencode" /> | 47 <expand macro="gencode" /> |
| 41 </when> | 48 </when> |
| 42 </conditional> | 49 </conditional> |
| 43 <conditional name="rate_cond"> | 50 <conditional name="rate_cond"> |
| 44 <param argument="--rv" name="rate" type="select" label="Rate variation"> | 51 <param argument="--rv" name="rate" type="select" label="Rate variation" help="Specify how site-to-site rate variation should be modeled."> |
| 45 <option value="">None</option> | 52 <option value="">None</option> |
| 46 <option value="GDD">General Discrete</option> | 53 <option value="GDD">General Discrete</option> |
| 47 <option value="Gamma">Beta-Gamma</option> | 54 <option value="Gamma">Beta-Gamma</option> |
| 48 </param> | 55 </param> |
| 49 <when value=""/> | 56 <when value=""/> |
| 50 <when value="GDD"> | 57 <when value="GDD"> |
| 51 <param argument="--rate-classes" type="integer" value="2" min="2" max="6" label="Rate classes" /> | 58 <param argument="--rate-classes" type="integer" value="2" min="2" max="6" label="Rate classes" help="The number of discrete rate classes to use for modeling site-to-site rate variation." /> |
| 52 </when> | 59 </when> |
| 53 <when value="Gamma"> | 60 <when value="Gamma"> |
| 54 <param argument="--rate-classes" type="integer" value="2" min="2" max="6" label="Rate classes" /> | 61 <param argument="--rate-classes" type="integer" value="2" min="2" max="6" label="Rate classes" help="The number of discrete rate classes to use for modeling site-to-site rate variation." /> |
| 55 </when> | 62 </when> |
| 56 </conditional> | 63 </conditional> |
| 64 <section name="advanced_options" title="Advanced Options" expanded="false"> | |
| 65 <param argument="--max-breakpoints" type="integer" value="10000" min="1" max="10000" label="Maximum number of breakpoints to consider" help="The maximum number of breakpoints the genetic algorithm will consider during its search."/> | |
| 66 <param argument="--mode" type="select" label="Run mode" help="Select the run mode for GARD. 'Normal' uses default optimization and convergence settings, while 'Faster' reduces precision and relaxes convergence for quicker results."> | |
| 67 <option value="Normal">Normal</option> | |
| 68 <option value="Faster">Faster</option> | |
| 69 </param> | |
| 70 </section> | |
| 57 </inputs> | 71 </inputs> |
| 58 <outputs> | 72 <outputs> |
| 59 <data name="gard_output" format="nex" /> | 73 <data name="gard_output" format="nex" /> |
| 60 <data name="translated" format="hyphy_results.json" label="${tool.name} on ${on_string}: Translated" /> | 74 <data name="gard_output_json" format="hyphy_results.json" label="${tool.name} on ${on_string}: gard_output_json" /> |
| 75 <data name="gard_md_report" format="markdown" from_work_dir="gard_stdout.md" label="GARD Report (Markdown) for ${tool.name} on ${on_string}" /> | |
| 61 </outputs> | 76 </outputs> |
| 62 <tests> | 77 <tests> |
| 63 <test> | 78 <test expect_num_outputs="3"> |
| 64 <param name="input_file" ftype="fasta" value="gard-in1.fa"/> | 79 <param name="input_file" ftype="fasta" value="gard-in1.fa"/> |
| 65 <output name="gard_output" file="gard-out1.nex" compare="sim_size"/> | 80 <output name="gard_output" file="gard-out1.nex" compare="sim_size"/> |
| 66 <output name="translated" file="gard-out1.json" compare="sim_size"/> | 81 <output name="gard_output_json"> |
| 82 <assert_contents> | |
| 83 <has_text text='"potentialBreakpoints":21'/> | |
| 84 </assert_contents> | |
| 85 </output> | |
| 86 <output name="gard_md_report"> | |
| 87 <assert_contents> | |
| 88 <has_text text="Done with 2 breakpoint analysis."/> | |
| 89 </assert_contents> | |
| 90 </output> | |
| 67 </test> | 91 </test> |
| 68 </tests> | 92 </tests> |
| 69 <help><![CDATA[ | 93 <help><![CDATA[ |
| 70 | |
| 71 GARD : Genetic Algorithms for Recombination Detection. | 94 GARD : Genetic Algorithms for Recombination Detection. |
| 72 ====================================================== | 95 ====================================================== |
| 73 | 96 |
| 74 What does this do? | 97 **What does this do?** |
| 75 ------------------ | |
| 76 | 98 |
| 77 This tools screens an alignment of sequences for evidence of recombination in one or more sequences. | 99 This tool screens an alignment of sequences for evidence of recombination in one or more sequences. |
| 78 The main idea is that if sufficient recombination has occurred, then no single phylogenetic tree will | 100 The main idea is that if sufficient recombination has occurred, then no single phylogenetic tree will |
| 79 properly fit the entire length of the alignment and instead a separate tree will be preferred for each *nonrecombinant* segment. | 101 properly fit the entire length of the alignment and instead a separate tree will be preferred for each *nonrecombinant* segment. |
| 80 | 102 |
| 81 Brief description | 103 **Methodology** |
| 82 ----------------- | |
| 83 | 104 |
| 84 This analysis implements a heuristic approach to screening alignments of sequences for | 105 GARD (Genetic Algorithm for Recombination Detection) implements a heuristic approach to screening alignments of sequences for recombination. It uses the CHC genetic algorithm to search for phylogenetic incongruence among different partitions of the data. The number of partitions is determined using a step-up procedure, while the placement of breakpoints is searched for with the GA. The best fitting model (based on c-AIC) is returned; and additional post-hoc tests run to distinguish topological incongruence from rate-variation. |
| 85 recombination, by using the CHC genetic algorithm (GA) to search for | |
| 86 phylogenetic incongruence among different partitions of the data. The | |
| 87 number of partitions is determined using a step-up procedure, while the | |
| 88 placement of breakpoints is searched for with the GA. The best fitting | |
| 89 model (based on c-AIC) is returned; and additional post-hoc tests run to | |
| 90 distinguish topological incongruence from rate-variation. | |
| 91 | 106 |
| 92 For each identified breakpoint, the support for its placement is calculated, and for each | 107 **The Intuition** |
| 93 non-recombinant fragment, a phylogenetic tree is inferred (using neighbor joining) and returned. | |
| 94 | 108 |
| 95 Input | 109 Imagine you have a long DNA sequence, and you suspect that different parts of this sequence might have evolved under different evolutionary histories due to recombination events. If you try to build a single phylogenetic tree for the entire sequence, it might not accurately represent the relationships between the organisms. |
| 96 ----- | 110 |
| 111 GARD addresses this by looking for "breakpoints" in the sequence where the evolutionary history changes. It uses a genetic algorithm to efficiently search for these breakpoints and then infers separate phylogenetic trees for each segment between the breakpoints. This allows for a more accurate understanding of the evolutionary history of recombinant sequences. | |
| 112 | |
| 113 **Input** | |
| 97 | 114 |
| 98 A *FASTA* sequence alignment | 115 A *FASTA* sequence alignment |
| 99 | 116 |
| 100 Output | 117 **Output** |
| 101 ------ | |
| 102 | 118 |
| 103 A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf). | 119 A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf). |
| 104 | 120 |
| 105 A custom visualization module for viewing these results is available (see http://vision.hyphy.org/GARD for an example) | 121 A custom visualization module for viewing these results is available (see http://vision.hyphy.org/GARD for an example) |
| 106 | 122 |
| 123 A Markdown file with a summary of the analysis. | |
| 107 | 124 |
| 108 Tool options | 125 **Further reading** |
| 109 ------------ | 126 |
| 127 | |
| 128 **Tool options** | |
| 110 :: | 129 :: |
| 111 | 130 |
| 112 | 131 |
| 113 --type type of alignment to screen | 132 --type type of alignment to screen |
| 114 Nucleotide [default]. | 133 Nucleotide [default]. |
| 128 Default value: Universal | 147 Default value: Universal |
| 129 | 148 |
| 130 --model The substitution model to use (for protein alignments). | 149 --model The substitution model to use (for protein alignments). |
| 131 default value: JTT | 150 default value: JTT |
| 132 | 151 |
| 133 --rv The discrete distribution to use for modeling site to site rate variation. | 152 --rv Site to site rate variation. |
| 134 | 153 None: Constant rates. |
| 135 None [default] | 154 Gamma: Unit mean gamma distribution discretized into N rates. |
| 136 No rate variation. This is the fastest option in terms of run time, but | 155 GDD: General discrete distribution on N rates. |
| 137 using it can result in false positives if there is significant site-to-site | |
| 138 rate variation | |
| 139 GDD | |
| 140 Use the general discrete distribution on N bins | |
| 141 Beta-Gamma | |
| 142 Use a discretized gamma with weights partitioned by a discretized beta | |
| 143 (see doi.org/10.1093/molbev/msi009) | |
| 144 | 156 |
| 145 --rate-classes How many site rate classes to use (if GDD or Beta-Gamma are selected) | 157 --rate-classes How many site rate classes to use (if GDD or Beta-Gamma are selected) |
| 146 default value: 4 | 158 default value: 4 |
| 147 | 159 |
| 160 --max-breakpoints Maximum number of breakpoints to consider. | |
| 148 | 161 |
| 149 ]]></help> | 162 --mode Run mode. |
| 163 Normal: Default optimization and convergence settings. | |
| 164 Faster: Reduce individual optimization precision and relax convergence settings. | |
| 165 | |
| 166 ]]> | |
| 167 </help> | |
| 150 <expand macro="citations"> | 168 <expand macro="citations"> |
| 151 <citation type="doi">10.1093/molbev/msl051</citation> | 169 <citation type="doi">10.1093/molbev/msl051</citation> |
| 152 </expand> | 170 </expand> |
| 153 </tool> | 171 </tool> |
