Mercurial > repos > matthias > dada2_learnerrors

diff macros.xml @ 5:9aeea74a1fc9 draft
planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit 990192685955e9cda0282e348c28ef6462d88a38
author: matthias
date: Sun, 05 May 2019 12:22:22 -0400
parents: 10141f4eaae9
children: f9040a48d23f
--- a/macros.xml	Mon Apr 29 09:48:19 2019 -0400
+++ b/macros.xml	Sun May 05 12:22:22 2019 -0400
@@ -25,23 +25,44 @@
 
     <token name="@DADA_UNIQUES@">dada2_derep,dada2_dada,dada2_mergepairs</token>
 
+    <!-- function to read dada2 data types
+         - derep, dada, and mergepairs are simply read as RDS 
+         - sequence_table is a named integer matrix (rows=samples, columns=ASVs)
+         - uniques is a named integer vector (columns=ASVs, only one rows)-->
     <token name="@READ_FOO@"><![CDATA[
+    read.uniques <- function ( fname ) {
+         p <- read.table(fname, header=F, sep="\t")
+         n <-x[,2]
+         names(n)<-x[,1]
+    }
     #def read_data($dataset)
-        #if $dataset.is_of_type('dada2_derep')
-            readRDS('$dataset)
-        #else if $dataset.is_of_type('dada2_dada')
+        #if $dataset.is_of_type('dada2_sequencetable')
+            t(as.matrix( read.table('$dataset', header=T, sep="\t", row.names=1) ))
+        #else if $dataset.is_of_type('dada2_uniques')
+            read.uniques('$dataset')
+        #else if $dataset.is_of_type('tabular')
+            read.table('$dataset', header=T, sep="\t", row.names=1)
+        #else
             readRDS('$dataset')
-        #else if $dataset.is_of_type('dada2_sequencetable')
-            as.matrix( read.table('$dataset', header=T, sep="\t", row.names=1) )
-        #else if $dataset.is_of_type('dada2_mergepairs')
-              readRDS('$dataset')
-        #else if $dataset.is_of_type('tabular')
-              read.table('$dataset', header=T, sep="\t", row.names=1 )
-        #else
-            #raise Exception("error: unknown input type")
         #end if
     #end def
     ]]></token>
+    <!-- function to write dada2 data types (the content or the R variable 'out' is written)
+         - derep, dada, and mergepairs are written as RDS 
+         - sequence_table is a named integer matrix (rows=samples, columns=ASVs)
+         - uniques is a named integer vector (columns=ASVs, only one rows)-->
+    <token name="@WRITE_FOO@"><![CDATA[
+write.data <- function( data, fname, type ){
+    if( type == 'dada2_uniques'){
+        write.table(data, file = fname, quote = F, sep = "\t", row.names = T, col.names = F)
+    }else if( type== 'dada2_sequencetable'){
+        write.table(t(data), file=fname, quote=F, sep="\t", row.names = T, col.names = NA)
+    }else{
+        saveRDS(data, file=fname)
+    }
+}
+    ]]></token> 
+
     <!-- for filterAndTrim -->
     <xml name="trimmers">
         <section name="trim" title="Trimming parameters">
@@ -63,9 +84,9 @@
 
     <xml name="errorEstimationFunction">
         <param name="errfoo" argument="errorEstimationFunction" type="select" label="Error function">
-            <option value="loessErrfun">loess</option>
-            <option value="noqualErrfun">noqual</option>
-            <option value="PacBioErrfun">PacBio</option>
+            <option value="loessErrfun">loess: Use a loess fit to estimate error rates from transition counts</option>
+            <option value="noqualErrfun">noqual: Estimate error rates for each type of transition while ignoring quality scores.</option>
+            <option value="PacBioErrfun">PacBio: Estimate error rates from transition counts in PacBio CCS data.</option>
         </param>
     </xml>
     <token name="@HELP_OVERVIEW@"><![CDATA[
author	matthias
date	Sun, 05 May 2019 12:22:22 -0400
parents	10141f4eaae9
children	f9040a48d23f