Mercurial > repos > bebatut > rdptools
comparison framebot.xml @ 0:73b3e2f98631 draft default tip
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/rdptools commit fa8135b9b1918785c3f3b3fb7325bfe031b44ec4
| author | bebatut |
|---|---|
| date | Mon, 16 Nov 2015 02:46:50 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:73b3e2f98631 |
|---|---|
| 1 <tool id="framebot" name="Framebot" version="0.1.0"> | |
| 2 <description> to coorect frameshift and classify nearest neighbor</description> | |
| 3 | |
| 4 <requirements> | |
| 5 <requirement type="package" version="2.0.2">rdptools</requirement> | |
| 6 </requirements> | |
| 7 | |
| 8 <stdio> | |
| 9 <exit_code range="1:" /> | |
| 10 </stdio> | |
| 11 | |
| 12 <command><![CDATA[ | |
| 13 java -jar \${RDP_TOOLS_DIR}/FrameBot.jar framebot | |
| 14 -a $alignment_mode | |
| 15 -i $identity_cutoff | |
| 16 -k $knn | |
| 17 -l $length_cutoff | |
| 18 | |
| 19 #if $no_metric_search.no_metric_search_test | |
| 20 -N | |
| 21 -e $no_metric_search.gap_ext_penalty | |
| 22 -f $no_metric_search.frameshift_penalty | |
| 23 -g $no_metric_search.gap_open_penalty | |
| 24 #else | |
| 25 -m $no_metric_search.max_radius | |
| 26 #end if | |
| 27 | |
| 28 -o output | |
| 29 | |
| 30 #if str( $databases.databases_selector ) == 'history' | |
| 31 $databases.databases_name | |
| 32 #else | |
| 33 #set $data_table = dict([(_[0], _[2]) for _ in $databases.databases_input.input.options.tool_data_table.data]) | |
| 34 $data_table[$databases.databases_input.value] | |
| 35 #end if | |
| 36 | |
| 37 $framebot_input_sequence_file | |
| 38 ]]></command> | |
| 39 | |
| 40 <inputs> | |
| 41 <param name="framebot_input_sequence_file" type="data" format="fasta" | |
| 42 label="Input sequence file" help=""/> | |
| 43 | |
| 44 <conditional name="databases"> | |
| 45 <param name="databases_selector" type="select" label="Databases to query" help=""> | |
| 46 <option value="cached" selected="true">Public reference gene databases</option> | |
| 47 <option value="history">Databases from your history</option> | |
| 48 </param> | |
| 49 <when value="cached"> | |
| 50 <param name="databases_input" label="Reference gene databases" type="select" display="radio"> | |
| 51 <options from_data_table="framebot_ref_gene_databases" /> | |
| 52 <validator type="no_options" message="Select at least one database"/> | |
| 53 </param> | |
| 54 </when> | |
| 55 <when value="history"> | |
| 56 <param name="databases_name" type="data" format="fasta" label="Reference gene database" | |
| 57 multiple="false" help=""/> | |
| 58 </when> | |
| 59 </conditional> | |
| 60 | |
| 61 <param name="alignment_mode" type="select" display="radio" | |
| 62 label="Alignment mode" help=""> | |
| 63 <option value="glocal">Glocal</option> | |
| 64 <option value="local">Local</option> | |
| 65 <option value="global">Global</option> | |
| 66 </param> | |
| 67 | |
| 68 <param name="identity_cutoff" type="float" min="0" max="1" value="0.4" | |
| 69 label="Percent identity cutoff" help=""/> | |
| 70 | |
| 71 <conditional name="no_metric_search"> | |
| 72 <param name="no_metric_search_test" type='boolean' checked="true" label="Disable | |
| 73 metric search?" help="Provide fasta file of seeds instead of | |
| 74 index file"/> | |
| 75 <when value="true"> | |
| 76 <param name="gap_ext_penalty" type="integer" min="-10" max="0" | |
| 77 value="-1" label="Gap extension penalty" help=""/> | |
| 78 <param name="frameshift_penalty" type="integer" min="-20" max="0" | |
| 79 value="-10" label="Frameshift penalty" help=""/> | |
| 80 <param name="gap_open_penalty" type="integer" min="-20" max="0" | |
| 81 value="-10" label="Gap opening penalty" help=""/> | |
| 82 </when> | |
| 83 <when value="false"> | |
| 84 <param name="max_radius" type="float" min="1" max="2147483647" | |
| 85 value="100" label="Maximum radius" help=""/> | |
| 86 </when> | |
| 87 </conditional> | |
| 88 | |
| 89 <param name="knn" type="integer" min="0" max="100" value="10" | |
| 90 label="The top k closest protein targets" help=""/> | |
| 91 <param name="length_cutoff" type="integer" min="0" max="100" value="80" | |
| 92 label="Length cutoff in number of amino acids" help=""/> | |
| 93 | |
| 94 <param name="transl_table" type="select" display="radio" label="Protein | |
| 95 translation table to use" help="NCBI Translation Tables"> | |
| 96 <option value="1">Standard Code</option> | |
| 97 <option value="2">Vertebrate Mitochondrial Code</option> | |
| 98 <option value="3">Yeast Mitochondrial Code</option> | |
| 99 <option value="4">Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> | |
| 100 <option value="5">Invertebrate Mitochondrial Code</option> | |
| 101 <option value="6">Ciliate, Dasycladacean and Hexamita Nuclear Code</option> | |
| 102 <option value="9">Echinoderm and Flatworm Mitochondrial Code</option> | |
| 103 <option value="10">Euplotid Nuclear Code</option> | |
| 104 <option value="11" selected="true">Bacterial, Archaeal and Plant Plastid Code</option> | |
| 105 <option value="12">Alternative Yeast Nuclear Code</option> | |
| 106 <option value="13">Ascidian Mitochondrial Code</option> | |
| 107 <option value="14">Alternative Flatworm Mitochondrial Code</option> | |
| 108 <option value="16">Chlorophycean Mitochondrial Code</option> | |
| 109 <option value="21">Trematode Mitochondrial Code</option> | |
| 110 <option value="22">Scenedesmus obliquus Mitochondrial Code</option> | |
| 111 <option value="23">Thraustochytrium Mitochondrial Code</option> | |
| 112 <option value="24">Pterobranchia Mitochondrial Code</option> | |
| 113 <option value="25">Candidate Division SR1 and Gracilibacteria Code</option> | |
| 114 </param> | |
| 115 | |
| 116 <param name="word_size" type="integer" min="3" max="6" value="4" | |
| 117 label="Word size used to find closest protein target" help=""/> | |
| 118 </inputs> | |
| 119 | |
| 120 <outputs> | |
| 121 <data format="txt" name="conserved_alignment_file" | |
| 122 from_work_dir="output_framebot.txt" | |
| 123 label="Conserved alignments to the neared match of ${on_string} (Framebot)" /> | |
| 124 <data format="fasta" name="corr_nucl_output" | |
| 125 from_work_dir="output_corr_nucl.fasta" | |
| 126 label="Frameshift-corrected nucleotide sequences of ${on_string} (Framebot)" /> | |
| 127 <data format="fasta" name="corr_prot_output" | |
| 128 from_work_dir="output_corr_prot.fasta" | |
| 129 label="Frameshift-corrected protein sequences of ${on_string} (Framebot)" /> | |
| 130 | |
| 131 <data format="txt" name="failed_alignment_file" | |
| 132 from_work_dir="output_failed_framebot.txt" | |
| 133 label="Rejected alignments to the neared match of ${on_string} (Framebot)" /> | |
| 134 <data format="fasta" name="failed_nucl_output" | |
| 135 from_work_dir="output_failed_nucl.fasta" | |
| 136 label="Non frameshift-corrected nucleotide sequences of ${on_string} (Framebot)" /> | |
| 137 </outputs> | |
| 138 | |
| 139 <tests> | |
| 140 <test> | |
| 141 <param name="framebot_input_sequence_file" value="framebot_input_sequence.fasta"/> | |
| 142 <param name="databases_selector" value="cached" /> | |
| 143 <param name="databases_input" value="amoA_prot_ref" /> | |
| 144 <param name="alignment_mode" value="glocal" /> | |
| 145 <param name="denovo_abund_cutoff" value="10" /> | |
| 146 <param name="denovo_id_cutoff" value="0.7" /> | |
| 147 <param name="identity_cutoff" value="0.4" /> | |
| 148 <param name="no_metric_search_test" value="true" /> | |
| 149 <param name="gap_ext_penalty" value="-1" label="Gap extension penalty" help=""/> | |
| 150 <param name="frameshift_penalty" value="-10" label="Frameshift penalty" help=""/> | |
| 151 <param name="gap_open_penalty" value="-10" label="Gap opening penalty" help=""/> | |
| 152 <param name="no_prefilter" value="false" /> | |
| 153 <param name="scoring_matrix" value="Blosum62"/> | |
| 154 <param name="knn" value="10" /> | |
| 155 <param name="length_cutoff" value="80"/> | |
| 156 <param name="transl_table" value="11"/> | |
| 157 <param name="word_size" value="4"/> | |
| 158 <param name="de_novo" value="false"/> | |
| 159 | |
| 160 <output name="conserved_alignment_file" file="framebot_conserved_alignment_file.txt"/> | |
| 161 <output name="corr_nucl_output" file="framebot_corr_nucl_output.fasta"/> | |
| 162 <output name="corr_prot_output" file="framebot_corr_prot_output.fasta"/> | |
| 163 <output name="failed_alignment_file" file="framebot_failed_alignment_file.txt"/> | |
| 164 <output name="failed_nucl_output" file="framebot_failed_nucl_output.fasta"/> | |
| 165 </test> | |
| 166 </tests> | |
| 167 | |
| 168 <help><![CDATA[ | |
| 169 | |
| 170 **What it does** | |
| 171 | |
| 172 RDP FrameBot is a frameshift correction and nearest neighbor classification tool for use with high-throughput amplicon sequencing. It uses a dynamic programming algorithm to align each query DNA sequence against a set of target protein sequences, produces frameshift-corrected protein and DNA sequences and an optimal global or local protein alignment. | |
| 173 More information on `Github repository <https://github.com/rdpstaff/Framebot>`_. | |
| 174 | |
| 175 ----- | |
| 176 | |
| 177 **Input** | |
| 178 | |
| 179 One protein reference fasta file or index file, and one DNA query fasta file are required. | |
| 180 | |
| 181 Several reference sets for a list of genes are available. | |
| 182 But personal own set of reference sequences can be provide as representative of the gene of interest (`http://fungene.cme.msu.edu <http://fungene.cme.msu.edu>`_ is a good resource). | |
| 183 The reference set must contain protein or DNA representative sequences of the gene target and should be compiled to have a good coverage of diversity of the gene family. FrameBot is significantly more accurate when the nearest target protein sequence (from the reference set) is at least 50% identical to the query read. Running FrameBot is computationally intensive in no-metric-search mode because it performs all-against-all comparisons between query DNA and the target protein sequences. Therefore we recommend limiting your reference set to 200 protein sequences for no-metric-search mode. The index metic-search mode gains more than 10-fold speedup by reducing the number of comparisons (see FrameBot citation). A larger DNA reference set can be used. | |
| 184 | |
| 185 ----- | |
| 186 | |
| 187 **Parameters** | |
| 188 | |
| 189 The parameters are numerous in Framebot | |
| 190 | |
| 191 - The alignment mode: glocal, local or global | |
| 192 - The minimum abundance for de-novo mode | |
| 193 - The maxmimum aa identity cutoff for de-novo mode | |
| 194 - The Percent identity cutoff | |
| 195 - The top k closest protein targets | |
| 196 - Length cutoff in number of amino acids | |
| 197 - Disable metric search (provide fasta file of seeds instead of index file) | |
| 198 - Result file name stem | |
| 199 - Disable the pre-filtering step for non-metric search | |
| 200 - Sequence quality data | |
| 201 - Protein translation table to use | |
| 202 - The word size used to find closest protein targets | |
| 203 - ... | |
| 204 | |
| 205 ----- | |
| 206 | |
| 207 **Output** | |
| 208 | |
| 209 The framebot step produces five output files: | |
| 210 | |
| 211 - the alignment to the nearest match satisfying the minimum length and protein identity cutoff. | |
| 212 - the frameshift-corrected nucleotide and protein sequences satisfying the minimum length and protein identity cutoff. | |
| 213 - the alignment to the nearest match that failed the minimum length and protein identity cutoff. | |
| 214 - a fasta file containing the nucleotide sequences that failed the minimum length and protein identity cutoff. | |
| 215 | |
| 216 ]]></help> | |
| 217 | |
| 218 <citations> | |
| 219 <citation type="doi">10.1128/mBio.00592-13</citation> | |
| 220 </citations> | |
| 221 </tool> |
