comparison dada2_makeSequenceTable.xml @ 3:c3834c230b0a draft

planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit 5b1603bbcd3f139cad5c876be83fcb39697b5613-dirty
author matthias
date Mon, 29 Apr 2019 09:00:48 -0400
parents d2e7c5f8a9f7
children ec4a183cc713
comparison
equal deleted inserted replaced
2:d2e7c5f8a9f7 3:c3834c230b0a
18 #end if 18 #end if
19 19
20 samples <- list() 20 samples <- list()
21 #for $s in $samples: 21 #for $s in $samples:
22 #if $len($samples) == 1 22 #if $len($samples) == 1
23 samples <- $read_data($s) 23 samples <- readRDS('$s')
24 #else 24 #else
25 samples[["$s.element_identifier"]] <- $read_data($s) 25 samples[["$s.element_identifier"]] <- readRDS('$s')
26 #end if 26 #end if
27 #end for 27 #end for
28 ## make sequence table 28 ## make sequence table
29 seqtab <- makeSequenceTable(samples, orderBy = "$orderby") 29 seqtab <- makeSequenceTable(samples, orderBy = "$orderBy")
30 30
31 31
32 reads.per.seqlen <- tapply(colSums(seqtab), factor(nchar(getSequences(seqtab))), sum) 32 reads.per.seqlen <- tapply(colSums(seqtab), factor(nchar(getSequences(seqtab))), sum)
33 df <- data.frame(length=as.numeric(names(reads.per.seqlen)), count=reads.per.seqlen) 33 df <- data.frame(length=as.numeric(names(reads.per.seqlen)), count=reads.per.seqlen)
34 34
35 #if $plot == "yes" 35 #if $plot == "yes"
36 pdf( '$plot_output' ) 36 pdf( '$plot_output' )
37 ggplot(data=df, aes(x=length, y=count)) + 37 ggplot(data=df, aes(x=length, y=count)) +
38 geom_col() + 38 geom_col() +
39 #if $filter_cond.filter_select != "no" 39 #if $filter_cond.filter_select != "no"
40 geom_vline( xintercept=c($filter_cond.min-0.5, $filter_cond.max+0.5) ) + 40 geom_vline( xintercept=c($filter_cond.min-0.5, $filter_cond.max+0.5) ) +
41 #end if 41 #end if
42 theme_bw() 42 theme_bw()
43 bequiet <- dev.off() 43 bequiet <- dev.off()
44 #end if 44 #end if
45 45
51 write.table(seqtab, "$stable", quote=F, sep="\t", row.names = T, col.names = NA) 51 write.table(seqtab, "$stable", quote=F, sep="\t", row.names = T, col.names = NA)
52 ]]></configfile> 52 ]]></configfile>
53 </configfiles> 53 </configfiles>
54 <inputs> 54 <inputs>
55 <param name="samples" type="data" multiple="true" format="@DADA_UNIQUES@" label="samples" /> 55 <param name="samples" type="data" multiple="true" format="@DADA_UNIQUES@" label="samples" />
56 <param name="orderby" type="select" label="Column order"> 56 <param argument="orderBy" type="select" label="Column order">
57 <option value="abundance">abundance</option> 57 <option value="abundance">abundance</option>
58 <option value="nsamples">nsamples</option> 58 <option value="nsamples">nsamples</option>
59 </param> 59 </param>
60 <conditional name="filter_cond"> 60 <conditional name="filter_cond">
61 <param name="filter_select" type="select" label="Filter method"> 61 <param name="filter_select" type="select" label="Length filter method">
62 <option value="no">No filter</option> 62 <option value="no">No filter</option>
63 <option value="minmax">Specify minimum and maximum sequence lengths</option> 63 <option value="minmax">Specify minimum and maximum sequence lengths</option>
64 </param> 64 </param>
65 <when value="no"/> 65 <when value="no"/>
66 <when value="minmax"> 66 <when value="minmax">
74 <data name="stable" format="dada2_sequencetable" label="${tool.name} on ${on_string}"/> 74 <data name="stable" format="dada2_sequencetable" label="${tool.name} on ${on_string}"/>
75 <data name="plot_output" format="pdf" label="${tool.name} on ${on_string}: sequence length distribution"> 75 <data name="plot_output" format="pdf" label="${tool.name} on ${on_string}: sequence length distribution">
76 <filter>plot</filter> 76 <filter>plot</filter>
77 </data> 77 </data>
78 </outputs> 78 </outputs>
79 <tests>
80 <test>
81 <param name="samples" ftype="dada2_mergepairs" value="mergePairs_F3D0.Rdata"/>
82 <output name="stable" value="makeSequenceTable_F3D0.tab" ftype="dada2_sequencetable" />
83 </test>
84 </tests>
85 <help><![CDATA[
86 Description
87 ...........
79 88
80 <help><![CDATA[ 89 This function constructs a sequence table -- more precisely an amplicon sequence variant table (ASV) table -- a higher-resolution version of the OTU table produced by traditional methods.
81 This function constructs a sequence table (analogous to an OTU table) from the provided list of
82 samples.
83 90
84 Custom Reference data sets 91 The sequence table is a matrix with rows corresponding to (and named by) the samples, and columns corresponding to (and named by) the sequence variants.
85 --------------------------
86 92
87 For ** taxonomy assignment ** the following is needed: 93 Usage
94 .....
88 95
89 - a reference fasta data base 96 **Input**: The result of derepFastq, dada, or mergePairs.
90 - a comma separated list of taxonomic ranks present in the reference data base
91 97
92 The reference fasta data base for taxonomic assignment (fasta or compressed fasta) needs to encode the taxonomy corresponding to each sequence in the fasta header lines in the following fashion (note, the second sequence is not assigned down to level 6): 98 **Output**: A data set of type dada2_sequencetable, i.e. a tabular with a row for each sample, and a column for each unique sequence across all the samples. The columns are named by the sequence.
93 99
94 :: 100 Details
101 .......
95 102
96 >Level1;Level2;Level3;Level4;Level5;Level6; 103 Sequences that are much longer or shorter than expected may be the result of non-specific priming. You can remove non-target-length by applying a length filter. This is analogous to “cutting a band” in-silico to get amplicons of the targeted length.
97 ACCTAGAAAGTCGTAGATCGAAGTTGAAGCATCGCCCGATGATCGTCTGAAGCTGTAGCATGAGTCGATTTTCACATTCAGGGATACCATAGGATAC
98 >Level1;Level2;Level3;Level4;Level5;
99 CGCTAGAAAGTCGTAGAAGGCTCGGAGGTTTGAAGCATCGCCCGATGGGATCTCGTTGCTGTAGCATGAGTACGGACATTCAGGGATCATAGGATAC
100 104
101 The list of required taxonomic ranks could be for instance: "Kingdom,Phylum,Class,Order,Family,Genus" 105 @HELP_OVERVIEW@
102
103 The reference data base for ** species assignment ** is a fasta file (or compressed fasta file), with the id line formatted as follows:
104
105 ::
106
107 >ID Genus species
108 ACCTAGAAAGTCGTAGATCGAAGTTGAAGCATCGCCCGATGATCGTCTGAAGCTGTAGCATGAGTCGATTTTCACATTCAGGGATACCATAGGATAC
109 >ID Genus species
110 CGCTAGAAAGTCGTAGAAGGCTCGGAGGTTTGAAGCATCGCCCGATGGGATCTCGTTGCTGTAGCATGAGTACGGACATTCAGGGATCATAGGATAC
111 ]]></help> 106 ]]></help>
112 <expand macro="citations"/> 107 <expand macro="citations"/>
113 </tool> 108 </tool>