comparison macros.xml.orig @ 5:e55eb3d22f79 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit e0d4688a59e6eeba33adcfe803ac43d0bc2863e7"
author iuc
date Tue, 31 Aug 2021 07:56:53 +0000
parents
children
comparison
equal deleted inserted replaced
4:21bd1282d216 5:e55eb3d22f79
1 <?xml version="1.0"?>
2 <macros>
3 <xml name="requirements">
4 <requirements>
5 <requirement type="package" version="@DADA2_VERSION@">bioconductor-dada2</requirement>
6 <yield/>
7 </requirements>
8 </xml>
9 <<<<<<< HEAD
10
11 <token name="@DADA2_VERSION@">1.20</token>
12 =======
13 <xml name="bio_tools">
14 <xrefs>
15 <xref type='bio.tools'>dada2</xref>
16 </xrefs>
17 </xml>
18 <token name="@DADA2_VERSION@">1.16</token>
19 >>>>>>> 449abf790 (add bio.tools ID)
20 <token name="@WRAPPER_VERSION@">0</token>
21
22 <xml name="version_command">
23 <version_command><![CDATA[
24 echo $(R --version | grep version | grep -v GNU)", dada2 version" $(R --vanilla --slave -e "library(dada2); cat(sessionInfo()\$otherPkgs\$dada2\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
25 ]]></version_command>
26 </xml>
27
28 <xml name="stdio">
29 <stdio>
30 <regex match="Error: cannot allocate" source="stderr" level="fatal_oom" description="Out of memory error occurred" />
31 <regex match="'Calloc' could not allocate memory" source="stderr" level="fatal_oom" description="Out of memory error occurred" />
32 </stdio>
33 </xml>
34
35 <xml name="citations">
36 <citations>
37 <citation type="doi">10.1038/nmeth.3869</citation>
38 <yield/>
39 </citations>
40 </xml>
41
42 <token name="@DADA_UNIQUES@">dada2_dada,dada2_mergepairs</token>
43
44 <!-- function to read dada2 data types
45 - dada, and mergepairs are simply read as RDS
46 - sequence_table is a named integer matrix (rows=samples, columns=ASVs)
47 - uniques is a named integer vector (columns=ASVs, only one rows)-->
48 <token name="@READ_FOO@"><![CDATA[
49 read.uniques <- function ( fname ) {
50 p <- read.table(fname, header=F, sep="\t")
51 n <-x[,2]
52 names(n)<-x[,1]
53 }
54 #def read_data($dataset)
55 #if $dataset.is_of_type('dada2_sequencetable')
56 t(as.matrix( read.table('$dataset', header=T, sep="\t", row.names=1) ))
57 #else if $dataset.is_of_type('dada2_uniques')
58 read.uniques('$dataset')
59 #else if $dataset.is_of_type('tabular')
60 read.table('$dataset', header=T, sep="\t", row.names=1)
61 #else
62 readRDS('$dataset')
63 #end if
64 #end def
65 ]]></token>
66 <!-- function to write dada2 data types (the content or the R variable 'out' is written)
67 - dada, and mergepairs are written as RDS
68 - sequence_table is a named integer matrix (rows=samples, columns=ASVs)
69 - uniques is a named integer vector (columns=ASVs, only one rows)-->
70 <token name="@WRITE_FOO@"><![CDATA[
71 write.data <- function( data, fname, type ){
72 if( type == 'dada2_uniques'){
73 write.table(data, file = fname, quote = F, sep = "\t", row.names = T, col.names = F)
74 }else if( type== 'dada2_sequencetable'){
75 write.table(t(data), file=fname, quote=F, sep="\t", row.names = T, col.names = NA)
76 }else{
77 saveRDS(data, file=fname)
78 }
79 }
80 ]]></token>
81
82 <xml name="fastq_input" token_multiple="" token_collection_type="" token_argument_fwd="" token_argument_rev="">
83 <conditional name="paired_cond">
84 <param name="paired_select" type="select" label="Paired reads">
85 <option value="paired">paired - in a data set pair</option>
86 <option value="separate">paired - in two separate data sets</option>
87 <option value="single">single</option>
88 </param>
89 <when value="paired">
90 <param name="reads" argument="@ARGUMENT_FWD@/@ARGUMENT_REV@" type="data_collection" collection_type="@COLLECTION_TYPE@" format="fastq,fastq.gz" label="Paired short read data"/>
91 </when>
92 <when value="separate">
93 <param name="reads" argument="@ARGUMENT_FWD@" type="data" format="fastq,fastq.gz" multiple="@MULTIPLE@" label="Forward read data"/>
94 <param name="sdaer" argument="@ARGUMENT_REV@" type="data" format="fastq,fastq.gz" multiple="@MULTIPLE@" label="Reverse read data"/>
95 </when>
96 <when value="single">
97 <param name="reads" argument="@ARGUMENT_FWD@" type="data" format="fastq,fastq.gz" multiple="@MULTIPLE@" label="Short read data"/>
98 </when>
99 </conditional>
100 </xml>
101
102 <!-- for filterAndTrim -->
103 <xml name="trimmers">
104 <section name="trim" title="Trimming parameters">
105 <param argument="truncQ" type="integer" value="2" min="0" label="Truncate reads at quality threshold" help="Truncate reads at the first instance of a quality score less than or equal to this threshold"/>
106 <param argument="trimLeft" type="integer" value="0" min="0" label="Trim start of each read" help="The number of nucleotides to remove from the start of each read."/>
107 <param argument="trimRight" type="integer" value="0" min="0" label="Trim end of each read" help="The number of nucleotides to remove from the end of each read"/>
108 <param argument="truncLen" type="integer" value="0" min="0" label="Truncate read length" help="Truncate reads after this amount of bases. Reads shorter than this are discarded. (default 0: no truncation)"/>
109 </section>
110 </xml>
111 <xml name="filters">
112 <section name="filter" title="Filtering parameters">
113 <param argument="maxLen" type="integer" value="" optional="true" min="0" label="Remove long reads" help="Remove reads with length greater than this value. Default: no length threshold"/>
114 <param argument="minLen" type="integer" value="20" min="0" label="Remove short reads" help="Remove reads with length less than this value. Default: 20"/>
115 <param argument="maxN" type="integer" value="0" min="0" label="Remove reads with more Ns" help="Note that some of the subsequent dada pipeline steps do not allow Ns"/>
116 <param argument="minQ" type="integer" value="0" min="0" label="Remove low quality reads" help="Reads contain a quality score less than this value will be discarded"/>
117 <param argument="maxEE" type="integer" value="" optional="true" min="0" label="Remove reads by number expected errors" help="Reads with a higher number of expected errors (EE) will be discarded, where EE = sum(10^(-Q_i/10)), with Q are the nominal quality scores at the read positions"/>
118 </section>
119 </xml>
120
121 <xml name="errorEstimationFunction">
122 <param name="errfoo" argument="errorEstimationFunction" type="select" label="Error function">
123 <option value="loessErrfun">loess: Use a loess fit to estimate error rates from transition counts</option>
124 <option value="noqualErrfun">noqual: Estimate error rates for each type of transition while ignoring quality scores.</option>
125 <option value="PacBioErrfun">PacBio: Estimate error rates from transition counts in PacBio CCS data.</option>
126 </param>
127 </xml>
128 <token name="@HELP_OVERVIEW@"><![CDATA[
129 Overview
130 ........
131
132 The intended use of the dada2 tools for paired sequencing data is shown in the following image.
133
134 .. image:: pairpipe.png
135
136 Note: In particular for the analysis of paired collections the collections should be sorted lexicographical
137 before the analysis.
138
139 For single end data you the steps "Unzip collection" and "mergePairs" are not necessary.
140
141 More information may be found on the dada2 homepage:: https://benjjneb.github.io/dada2/index.html (in particular tutorials) or the documentation of dada2's R package https://bioconductor.org/packages/release/bioc/html/dada2.html (in particular the pdf which contains the full documentation of all parameters)
142 ]]></token>
143 </macros>