Mercurial > repos > matthias > dada2_learnerrors
view macros.xml @ 8:af8d1ccbd153 draft
planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit 977f22125c9ad5c3c5560de8946017305c5633c1
author | matthias |
---|---|
date | Mon, 27 May 2019 13:21:48 -0400 |
parents | 382900945187 |
children | ef3ebaa70032 |
line wrap: on
line source
<?xml version="1.0"?> <macros> <xml name="requirements"> <requirements> <requirement type="package" version="@DADA2_VERSION@">bioconductor-dada2</requirement> <yield/> </requirements> </xml> <token name="@DADA2_VERSION@">1.10.0</token> <token name="@WRAPPER_VERSION@">3</token> <xml name="version_command"> <version_command><![CDATA[ echo $(R --version | grep version | grep -v GNU)", dada2 version" $(R --vanilla --slave -e "library(dada2); cat(sessionInfo()\$otherPkgs\$dada2\$Version)" 2> /dev/null | grep -v -i "WARNING: ") ]]></version_command> </xml> <xml name="citations"> <citations> <citation type="doi">10.1038/nmeth.3869</citation> <yield/> </citations> </xml> <token name="@DADA_UNIQUES@">dada2_derep,dada2_dada,dada2_mergepairs</token> <!-- function to read dada2 data types - derep, dada, and mergepairs are simply read as RDS - sequence_table is a named integer matrix (rows=samples, columns=ASVs) - uniques is a named integer vector (columns=ASVs, only one rows)--> <token name="@READ_FOO@"><![CDATA[ read.uniques <- function ( fname ) { p <- read.table(fname, header=F, sep="\t") n <-x[,2] names(n)<-x[,1] } #def read_data($dataset) #if $dataset.is_of_type('dada2_sequencetable') t(as.matrix( read.table('$dataset', header=T, sep="\t", row.names=1) )) #else if $dataset.is_of_type('dada2_uniques') read.uniques('$dataset') #else if $dataset.is_of_type('tabular') read.table('$dataset', header=T, sep="\t", row.names=1) #else readRDS('$dataset') #end if #end def ]]></token> <!-- function to write dada2 data types (the content or the R variable 'out' is written) - derep, dada, and mergepairs are written as RDS - sequence_table is a named integer matrix (rows=samples, columns=ASVs) - uniques is a named integer vector (columns=ASVs, only one rows)--> <token name="@WRITE_FOO@"><![CDATA[ write.data <- function( data, fname, type ){ if( type == 'dada2_uniques'){ write.table(data, file = fname, quote = F, sep = "\t", row.names = T, col.names = F) }else if( type== 'dada2_sequencetable'){ write.table(t(data), file=fname, quote=F, sep="\t", row.names = T, col.names = NA) }else{ saveRDS(data, file=fname) } } ]]></token> <!-- for filterAndTrim --> <xml name="trimmers"> <section name="trim" title="Trimming parameters"> <param argument="truncQ" type="integer" value="2" min="0" label="Truncate reads at quality threshold" help="Truncate reads at the first instance of a quality score less than or equal to this threshold"/> <param argument="trimLeft" type="integer" value="0" min="0" label="Trim start of each read" help="The number of nucleotides to remove from the start of each read."/> <param argument="trimRight" type="integer" value="0" min="0" label="Trim end of each read" help="The number of nucleotides to remove from the end of each read"/> <param argument="truncLen" type="integer" value="0" min="0" label="Truncate read length" help="Truncate reads after this amount of bases. Reads shorter than this are discarded. (default 0: no truncation)"/> </section> </xml> <xml name="filters"> <section name="filter" title="Filtering parameters"> <param argument="maxLen" type="integer" value="" optional="true" min="0" label="Remove long reads" help="Remove reads with length greater than this value. Default: no length threshold"/> <param argument="minLen" type="integer" value="20" min="0" label="Remove short reads" help="Remove reads with length less than this value. Default: 20"/> <param argument="maxN" type="integer" value="0" min="0" label="Remove reads with more Ns" help="Note that some of the subsequent dada pipeline steps do not allow Ns"/> <param argument="minQ" type="integer" value="0" min="0" label="Remove low quality reads" help="Reads contain a quality score less than this value will be discarded"/> <param argument="maxEE" type="integer" value="" optional="true" min="0" label="Remove reads by number expected errors" help="Reads with a higher number of expected errors (EE) will be discarded, where EE = sum(10^(-Q_i/10)), with Q are the nominal quality scores at the read positions"/> </section> </xml> <xml name="errorEstimationFunction"> <param name="errfoo" argument="errorEstimationFunction" type="select" label="Error function"> <option value="loessErrfun">loess: Use a loess fit to estimate error rates from transition counts</option> <option value="noqualErrfun">noqual: Estimate error rates for each type of transition while ignoring quality scores.</option> <option value="PacBioErrfun">PacBio: Estimate error rates from transition counts in PacBio CCS data.</option> </param> </xml> <token name="@HELP_OVERVIEW@"><![CDATA[ Overview ........ The intended use of the dada2 tools for paired sequencing data is shown in the following image. .. image:: pairpipe.png For single end data you the steps "Unzip collection" and "mergePairs" are not necessary. More information may be found on the dada2 homepage:: https://benjjneb.github.io/dada2/index.html (in particular tutorials) or the documentation of dada2's R package https://bioconductor.org/packages/release/bioc/html/dada2.html (in particular the pdf which contains the full documentation of all parameters) ]]></token> </macros>