Mercurial > repos > bebatut > qiime
comparison assign_taxonomy.xml @ 0:c1bd0c560018 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime commit bcbe76277f3e60303faf826f8ce7f018bc663a9a-dirty
| author | bebatut |
|---|---|
| date | Tue, 02 Feb 2016 05:50:37 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c1bd0c560018 |
|---|---|
| 1 <tool id="qiime_assign_taxonomy" name="assign taxonomy" version="1.9.1galaxy1"> | |
| 2 | |
| 3 <description>Assign taxonomy to each sequence</description> | |
| 4 | |
| 5 <macros> | |
| 6 <import>macros.xml</import> | |
| 7 </macros> | |
| 8 | |
| 9 <expand macro="requirements" /> | |
| 10 | |
| 11 <command> | |
| 12 <![CDATA[ | |
| 13 assign_taxonomy.py -i $input_fasta_fp | |
| 14 | |
| 15 #if str($id_to_taxonomy_fp) != 'None': | |
| 16 -t $id_to_taxonomy_fp | |
| 17 #end if | |
| 18 | |
| 19 #if str($reference_seqs_fp) != 'None': | |
| 20 -r $reference_seqs_fp | |
| 21 #end if | |
| 22 | |
| 23 #if str($methodcond.assignment_method) = 'None': | |
| 24 -m uclust | |
| 25 #end if | |
| 26 | |
| 27 #if str($methodcond.assignment_method) != 'None': | |
| 28 -m $methodcond.assignment_method | |
| 29 #end if | |
| 30 | |
| 31 #if $methodcond.assignment_method == "rtax": | |
| 32 | |
| 33 #if $methodcond.single_ok: | |
| 34 --single_ok | |
| 35 #end if | |
| 36 | |
| 37 #if $methodcond.no_single_ok_generic: | |
| 38 --no_single_ok_generic | |
| 39 #end if | |
| 40 | |
| 41 #if str($methodcond.read_id_regex): | |
| 42 --read_id_regex=$methodcond.read_id_regex | |
| 43 #end if | |
| 44 | |
| 45 #if str($methodcond.amplicon_id_regex): | |
| 46 --amplicon_id_regex=$methodcond.amplicon_id_regex | |
| 47 #end if | |
| 48 | |
| 49 #if str($methodcond.header_id_regex): | |
| 50 --header_id_regex=$methodcond.header_id_regex | |
| 51 #end if | |
| 52 #end if | |
| 53 | |
| 54 #if $methodcond.assignment_method == "sortmerna": | |
| 55 | |
| 56 #if str($methodcond.sortmerna_db): | |
| 57 --sortmerna_db=$methodcond.sortmerna_db | |
| 58 #end if | |
| 59 | |
| 60 #if $methodcond.sortmerna_e_value: | |
| 61 --sortmerna_e_value=$methodcond.sortmerna_e_value | |
| 62 #end if | |
| 63 | |
| 64 #if $methodcond.sortmerna_coverage: | |
| 65 --sortmerna_coverage=$methodcond.sortmerna_coverage | |
| 66 #end if | |
| 67 | |
| 68 #if $methodcond.sortmerna_best_N_alignments: | |
| 69 --sortmerna_best_N_alignments=$methodcond.sortmerna_best_N_alignments | |
| 70 #end if | |
| 71 | |
| 72 #if str($methodcond.sortmerna_threads): | |
| 73 --sortmerna_threads=$methodcond.sortmerna_threads | |
| 74 #end if | |
| 75 | |
| 76 #if $methodcond.min_consensus_fraction: | |
| 77 --min_consensus_fraction=$methodcond.min_consensus_fraction | |
| 78 #end if | |
| 79 | |
| 80 #if $methodcond.similarity: | |
| 81 --similarity=$methodcond.similarity | |
| 82 #end if | |
| 83 #end if | |
| 84 | |
| 85 #if $methodcond.assignment_method == "blast": | |
| 86 | |
| 87 #if str($methodcond.blast_db) != 'None': | |
| 88 -b \$BLAST_DB_NAME | |
| 89 #end if | |
| 90 | |
| 91 #if $methodcond.blast_e_value: | |
| 92 -e $methodcond.blast_e_value | |
| 93 #end if | |
| 94 #end if | |
| 95 | |
| 96 #if $methodcond.assignment_method == "rdp": | |
| 97 | |
| 98 #if $methodcond.confidence: | |
| 99 -c $methodcond.confidence | |
| 100 #end if | |
| 101 | |
| 102 #if $methodcond.rdp_max_memory: | |
| 103 --rdp_max_memory=$methodcond.rdp_max_memory | |
| 104 #end if | |
| 105 #end if | |
| 106 | |
| 107 #if $methodcond.assignment_method == "mothur": | |
| 108 | |
| 109 #if $methodcond.confidence: | |
| 110 -c $methodcond.confidence | |
| 111 #end if | |
| 112 #end if | |
| 113 | |
| 114 #if $methodcond.assignment_method == "uclust": | |
| 115 | |
| 116 #if $methodcond.min_consensus_fraction: | |
| 117 --min_consensus_fraction=$methodcond.min_consensus_fraction | |
| 118 #end if | |
| 119 | |
| 120 #if $methodcond.similarity: | |
| 121 --similarity=$methodcond.similarity | |
| 122 #end if | |
| 123 | |
| 124 #if $methodcond.uclust_max_accepts: | |
| 125 --uclust_max_accepts=$methodcond.uclust_max_accepts | |
| 126 #end if | |
| 127 #end if | |
| 128 -o assign_taxonomy_output | |
| 129 ]]> | |
| 130 </command> | |
| 131 | |
| 132 <inputs> | |
| 133 <param label="-i/--input_fasta_fp: path to the input fasta file" | |
| 134 name="input_fasta_fp" optional="False" type="data"/> | |
| 135 <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt" | |
| 136 label="-t/--id_to_taxonomy_fp: Path to tab-delimited file mapping | |
| 137 sequences to assigned taxonomy. Each assigned taxonomy is provided as | |
| 138 a semicolon-separated list. For assignment with rdp, each assigned | |
| 139 taxonomy must be exactly 6 levels deep. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt]" | |
| 140 name="id_to_taxonomy_fp" optional="True" type="data"/> | |
| 141 <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta" | |
| 142 label="-r/--reference_seqs_fp: Path to reference sequences. For | |
| 143 assignment with blast, these are used to generate a blast database. | |
| 144 For assignment with rdp, they are used as training sequences for the | |
| 145 classifier. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta]" | |
| 146 name="reference_seqs_fp" optional="True" type="data"/> | |
| 147 | |
| 148 <conditional name="methodcond"> | |
| 149 <param label="-m/--assignment_method: Taxon assignment method, must be | |
| 150 one of rdp, blast, rtax, mothur, uclust, sortmerna [default: uclust]" | |
| 151 name="assignment_method" optional="False" type="select"> | |
| 152 <option selected="True" value="uclust">uclust</option> | |
| 153 <option value="rdp">rdp</option> | |
| 154 <option value="blast">blast</option> | |
| 155 <option value="rtax">rtax</option> | |
| 156 <option value="mothur">mothur</option> | |
| 157 <option value="sortmerna">sortmerna</option> | |
| 158 </param> | |
| 159 <when value="rtax"> | |
| 160 <param label="--single_ok: When classifying paired ends, allow | |
| 161 fallback to single-ended classification when the mate pair is | |
| 162 lacking (used for RTAX only). [default: False]" name="single_ok" | |
| 163 selected="False" type="boolean"/> | |
| 164 <param label="--no_single_ok_generic: When classifying paired ends, | |
| 165 do not allow fallback to single-ended classification when the | |
| 166 mate pair is overly generic (used for RTAX only). [default: False]" | |
| 167 name="no_single_ok_generic" selected="False" type="boolean"/> | |
| 168 <param default="\S+\s+(\S+)" label="--read_id_regex: Used to parse | |
| 169 the result of OTU clustering, to get the read_1_id for each | |
| 170 clusterID. The clusterID itself is assumed to be the first | |
| 171 field, and is not captured by the regex. (used for RTAX only). | |
| 172 [default: \S+\s+(\S+)]" name="read_id_regex" optional="True" | |
| 173 type="text"/> | |
| 174 <param default="(\S+)\s+(\S+?)\/" label="--amplicon_id_regex: Used | |
| 175 to parse the result of split_libraries, to get the ampliconID | |
| 176 for each read_1_id. Two groups capture read_1_id and ampliconID, | |
| 177 respectively. (used for RTAX only). [default: (\S+)\s+(\S+?)\/]" | |
| 178 name="amplicon_id_regex" optional="True" type="text"/> | |
| 179 <param default="\S+\s+(\S+?)\/" label="--header_id_regex: Used to | |
| 180 parse the result of split_libraries, to get the portion of the | |
| 181 header that RTAX uses to match mate pairs. The default uses | |
| 182 the amplicon ID, not including /1 or /3, as the primary key | |
| 183 for the query sequences. Typically this regex will be the | |
| 184 same as amplicon_id_regex, except that only the second group | |
| 185 is captured. (used for RTAX only). [default: \S+\s+(\S+?)\/]" | |
| 186 name="header_id_regex" optional="True" type="text"/> | |
| 187 </when> | |
| 188 <when value="sortmerna"> | |
| 189 <param label="--sortmerna_db: Pre-existing database to search | |
| 190 against when using sortmerna [default: None]" name="sortmerna_db" | |
| 191 optional="True" type="text"/> | |
| 192 <param default="1.0" label="--sortmerna_e_value: Maximum E-value | |
| 193 when clustering [default = 1.0]" name="sortmerna_e_value" | |
| 194 optional="True" type="float"/> | |
| 195 <param default="0.9" label="--sortmerna_coverage: Mininum percent | |
| 196 query coverage (of an alignment) to consider a hit, expressed | |
| 197 as a fraction between 0 and 1 [default: 0.9]" | |
| 198 name="sortmerna_coverage" optional="True" type="float"/> | |
| 199 <param default="5" label="--sortmerna_best_N_alignments: This option | |
| 200 specifies how many best alignments per read will be written | |
| 201 [default: 5]" name="sortmerna_best_N_alignments" optional="True" | |
| 202 type="integer"/> | |
| 203 <param default="1" label="--sortmerna_threads: Specify number of | |
| 204 threads to be used for sortmerna mapper which utilizes multithreading. | |
| 205 [default: 1]" name="sortmerna_threads" optional="True" | |
| 206 type="text"/> | |
| 207 <param default="0.51" label="--min_consensus_fraction: Minimum | |
| 208 fraction of database hits that must have a specific taxonomic | |
| 209 assignment to assign that taxonomy to a query, only used for | |
| 210 sortmerna and uclust methods [default: 0.51]" | |
| 211 name="min_consensus_fraction" optional="True" type="float"/> | |
| 212 <param default="0.9" label="--similarity: Minimum percent similarity | |
| 213 (expressed as a fraction between 0 and 1) to consider a database | |
| 214 match a hit, only used for sortmerna and uclust methods | |
| 215 [default: 0.9]" name="similarity" optional="True" type="float"/> | |
| 216 </when> | |
| 217 <when value="blast"> | |
| 218 <param label="-b/--blast_db: Database to blast against. Must provide | |
| 219 either --blast_db or --reference_seqs_db for assignment with blast | |
| 220 [default: None]" name="blast_db" optional="True" type="data"/> | |
| 221 <param default="0.001" label="-e/--blast_e_value: Maximum e-value | |
| 222 to record an assignment, only used for blast method [default: | |
| 223 0.001]" name="blast_e_value" optional="True" type="float"/> | |
| 224 </when> | |
| 225 <when value="rdp"> | |
| 226 <param default="0.5" label="-c/--confidence: Minimum confidence to | |
| 227 record an assignment, only used for rdp and mothur methods | |
| 228 [default: 0.5]" name="confidence" optional="True" type="float"/> | |
| 229 <param default="4000" label="--rdp_max_memory: Maximum memory | |
| 230 allocation, in MB, for Java virtual machine when using the | |
| 231 rdp method. Increase for large training sets [default: 4000]" | |
| 232 name="rdp_max_memory" optional="True" type="integer"/> | |
| 233 </when> | |
| 234 <when value="mothur"> | |
| 235 <param default="0.5" label="-c/--confidence: Minimum confidence to | |
| 236 record an assignment, only used for rdp and mothur methods | |
| 237 [default: 0.5]" name="confidence" optional="True" type="float"/> | |
| 238 </when> | |
| 239 <when value="uclust"> | |
| 240 <param default="0.51" label="--min_consensus_fraction: Minimum | |
| 241 fraction of database hits that must have a specific taxonomic | |
| 242 assignment to assign that taxonomy to a query, only used for | |
| 243 sortmerna and uclust methods [default: 0.51]" name="min_consensus_fraction" | |
| 244 optional="True" type="float"/> | |
| 245 <param default="0.9" label="--similarity: Minimum percent similarity | |
| 246 (expressed as a fraction between 0 and 1) to consider a database | |
| 247 match a hit, only used for sortmerna and uclust methods [default: | |
| 248 0.9]" name="similarity" optional="True" type="float"/> | |
| 249 <param default="3" label="--uclust_max_accepts: Number of database | |
| 250 hits to consider when making an assignment, only used for uclust | |
| 251 method [default: 3]" name="uclust_max_accepts" optional="True" | |
| 252 type="integer"/> | |
| 253 </when> | |
| 254 </conditional> | |
| 255 </inputs> | |
| 256 | |
| 257 <outputs> | |
| 258 <data format="txt" from_work_dir="assign_taxonomy_output/*.log" | |
| 259 label="tax_assignements.log" name="tax_assignements.log"/> | |
| 260 <data format="txt" from_work_dir="assign_taxonomy_output/*.txt" | |
| 261 label="tax_assignements.txt" name="tax_assignements.txt"/> | |
| 262 </outputs> | |
| 263 | |
| 264 <tests> | |
| 265 <test> | |
| 266 </test> | |
| 267 </tests> | |
| 268 | |
| 269 <help><![CDATA[ | |
| 270 **What it does** | |
| 271 | |
| 272 Contains code for assigning taxonomy, using several techniques. | |
| 273 | |
| 274 Given a set of sequences, %prog attempts to assign the taxonomy of each sequence. | |
| 275 Currently the methods implemented are assignment with BLAST, the RDP classifier, | |
| 276 RTAX, mothur, and uclust. The output of this step is an observation metadata | |
| 277 mapping file of input sequence identifiers (1st column of output file) to taxonomy | |
| 278 (2nd column) and quality score (3rd column). There may be method-specific information | |
| 279 in subsequent columns. | |
| 280 | |
| 281 Reference data sets and id-to-taxonomy maps for 16S rRNA sequences can be found in | |
| 282 the Greengenes reference OTU builds. To get the latest build of the Greengenes OTUs | |
| 283 (and other marker gene OTU collections), follow the "Resources" link from http://qiime.org. | |
| 284 After downloading and unzipping you can use the following files as -r and -t, where | |
| 285 <otus_dir> is the name of the new directory after unzipping the reference OTUs tgz | |
| 286 file. | |
| 287 | |
| 288 -r <otus_dir>/rep_set/97_otus.fasta | |
| 289 -t <otus_dir></otus_dir>/taxonomy/97_otu_taxonomy.txt | |
| 290 | |
| 291 The consensus taxonomy assignment implemented here is the most detailed lineage | |
| 292 description shared by 90% or more of the sequences within the OTU (this level of | |
| 293 agreement can be adjusted by the user). The full lineage information for each | |
| 294 sequence is one of the output files of the analysis. In addition, a conflict file | |
| 295 records cases in which a phylum-level taxonomy assignment disagreement exists | |
| 296 within an OTU (such instances are rare and can reflect sequence misclassification | |
| 297 within the greengenes database). | |
| 298 ]]> | |
| 299 </help> | |
| 300 | |
| 301 <citations> | |
| 302 <expand macro="citations" /> | |
| 303 </citations> | |
| 304 </tool> |
