annotate data2.R @ 1:f2abdc9a849c draft

planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
author matthias
date Fri, 08 Mar 2019 08:43:09 -0500
parents 98e24c66eeb2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
1 library(dada2)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
2
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
3 # library(DBI)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
4 # library(ggplot2)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
5 library(optparse)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
6 # library(RSQLite)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
7 # library(stringr)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
8
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
9 ## source required R functions
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
10 source('user_input_functions.R')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
11
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
12 # print dada2 version
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
13 print(paste("dada2 version: ", packageVersion("dada2")))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
14
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
15 # # R function to create fasta file from dada2 output data
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
16 # outdir is directory to output fasta file
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
17 # taxa is taxonomy file generated by dada2
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
18 # prefix is string for desired prefix attached to output file names
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
19
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
20 dada2fasta <- function(outdir, seqtab.nochim, prefix){
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
21 seq <- colnames(seqtab.nochim)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
22 n <- 0
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
23 ASVs <- c()
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
24 for(i in seq){
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
25 n <- n + 1
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
26 ASV <- paste('ASV', as.character(n), sep = '_')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
27 ASVs <- c(ASVs, ASV)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
28 line1 <- paste('>',ASV,sep='')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
29 write(line1, file.path(outdir,sprintf('%s.fasta',prefix)), append=T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
30 write(i, file.path(outdir,sprintf('%s.fasta',prefix)), append=T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
31 }
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
32 return(ASVs)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
33 }
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
34
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
35
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
36 # # R DADA2 workflow
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
37 # wd is path to fastq files
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
38 # r_path is path to user_input_functions.R
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
39 # outdir is path to output directory
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
40 # prefix is string for desired prefix attached to output file names
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
41
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
42 dada2run <- function(wd, r_path, outdir, prefix){
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
43
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
44 # read-in files-------------------------------------------------------
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
45 ## obtain vectors of forward and reverse reads based on 'R1' and 'R2' in file names
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
46 ## additionally obtain the coressponding sample names for these files
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
47 p1 <- c()
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
48 p2 <- c()
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
49 sample.names <- c()
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
50 for(f in list.files(wd, full.names=T)){
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
51 if(grepl('_1.fq', f)){
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
52 sample <- gsub('^.*[/](.*?)_1\\.fq\\.gz', '\\1', f)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
53 sample.names <- c(sample.names, sample)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
54 p1 <- c(p1, f)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
55 }
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
56 if(grepl('_2.fq', f)){
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
57 p2 <- c(p2, f)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
58 }
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
59 }
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
60 fnFs <- sort(p1)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
61 fnRs <- sort(p2)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
62 sample.names <- sort(sample.names)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
63
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
64 save(list = ls(all.names = TRUE), file = file.path(outdir, paste0(prefix, 'state_test.Rdata')))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
65 #load(file = file.path(outdir, paste0(prefix, 'state_test.Rdata')))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
66
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
67 ## print for review
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
68 to_view <- data.frame(sample.names, fnFs, fnRs)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
69 cat("The following fastq files were found:\n")
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
70 print(to_view)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
71
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
72 # Perform quality filtering and trimming---------------------------------
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
73 ## assign new names to samples
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
74 filtFs <- file.path(outdir, paste0(sample.names, 'F_filt.fastq.gz'))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
75 filtRs <- file.path(outdir, paste0(sample.names, 'R_filt.fastq.gz'))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
76
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
77 ## plot forward and reverse quality so that user can decide on filtering parameters
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
78 cat('Plotting quality profile of forward reads...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
79 Fqp1 <- plotQualityProfile(fnFs[1])
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
80 #print(Fqp1)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
81 ggsave(sprintf('%s_forward_1_qualityprofile.pdf',prefix), Fqp1, path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
82 #ggsave(sprintf('%s_forward_1_qualityprofile.emf',prefix), Fqp1, path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
83 Fqp2 <- plotQualityProfile(fnFs[2])
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
84 #print(Fqp2)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
85 ggsave(sprintf('%s_forward_2_qualityprofile.pdf',prefix),Fqp2, path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
86 #ggsave(sprintf('%s_forward_2_qualityprofile.emf',prefix), Fqp2, path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
87 #cat('Which position would you like to truncate the forward reads at?\nPlease use the red-dashed lines as a guide, where they stop appearing indicates good quality.\nNOTE: Do NOT over-trim! You still require overlap between your forward and reverse reads in order to merge them later!\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
88 len1 <- 240
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
89 cat('Plotting quality profile of reverse reads...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
90 Rqp1 <- plotQualityProfile(fnRs[1])
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
91 #print(Rqp1)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
92 ggsave(sprintf('%s_reverse_1_qualityprofile.pdf',prefix),Rqp1, path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
93 #ggsave(sprintf('%s_reverse_1_qualityprofile.emf',prefix), Rqp1, path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
94 Rqp2 <- plotQualityProfile(fnRs[2])
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
95 #print(Rqp2)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
96 ggsave(sprintf('%s_reverse_2_qualityprofile.pdf',prefix), Rqp2, path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
97 #ggsave(sprintf('%s_reverse_2_qualityprofile.emf',prefix), Rqp2, path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
98 #cat('Which position would you like to truncate the forward reads at?\nPlease use the red-dashed lines as a guide, where they stop appearing indicates good quality.\nNOTE: Do NOT over-trim! You still require overlap between your forward and reverse reads in order to merge them later!\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
99 len2 <- 240
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
100
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
101 ## filter and trim
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
102 ## remaining parameters set to recommended defaults
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
103 ## maxN must be set to 0 (DADA2 requries no Ns)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
104 ## The maxEE parameter sets the maximum number of "expected errors" allowed in a read, which is a better filter than simply averaging quality scores.
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
105 ## If not using Windows, you may set multithread to TRUE
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
106 ## NOTE: Do not use the truncLen parameter for ITS sequencing
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
107 ## trimLeft needs to be varied based on the size of your primers (i.e. it is used to trim your primer sequences)!!
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
108 cat('Filtering and trimming sequences...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
109 out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(len1,len2), maxN=0, maxEE=c(2,2), truncQ=2, rm.phix=T, compress=T, multithread=threads, trimLeft=15)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
110
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
111 ## have user review read count changes, and relax error rate if too many reads are lost
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
112 ## for example, you may especially want to relax the number of expected errors on the reverse reads (i.e. maxEE=c(2,5)), as the reverse is prone to error on the Illumina sequencing platform
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
113 print(head(out))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
114 check2 <- T
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
115 while(check2 == F){
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
116 maxF <- numeric_input("What would you like the maximum number of expected errors in the forward reads to be?\nDefault 2:", 2)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
117 maxR <- numeric_input("What would you like the maximum number of expected errors in the reverse reads to be?\nDefault 5:", 5)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
118 out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(len1,len2), maxN=0, maxEE=c(maxF,maxR), truncQ=2, rm.phix=T, compress=T, multithread=threads)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
119 print(head(out))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
120 check2 <- yn_input('Proceed? If you lost too many reads, you can choose to not proceed and you will have the option to relax the error rate. Default yes:',T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
121 }
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
122
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
123 # Have DADA2 learn the error rates-----------------------------------------------
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
124 ## If not using Windows, you may set multithread to TRUE
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
125 read.subset <- 1e6
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
126 cat('Learning error rate of forward reads...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
127 errF <- learnErrors(filtFs, nreads=read.subset, multithread=threads)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
128
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
129 ## have user check estimated error rates
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
130 ## note the calculations are done with a subset of the total reads, as it is computationally intensive
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
131 ## if the fit is poor, the user has the option to try again with an increased subset number of reads
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
132 Error_f <- plotErrors(errF, nominalQ = T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
133 #print(Error_f)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
134 ggsave(sprintf('%s_forward_Error_plot.pdf',prefix), Error_f, path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
135 #ggsave(sprintf('%s_forward_Error_plot.emf',prefix), Error_f, path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
136 check3a <- T
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
137 while(check3a == F){
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
138 read.subset <- numeric_input('Please specify the number of reads you would like dada2 to utilize to calculate the error rate.\nThe default previously used was 1e6.\nThe newly recommended default is 10-fold greater,\n1e7:',1e7)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
139 errF <- learnErrors(filtFs, nreads=read.subset, multithread=threads)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
140 print(Error_f)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
141 ggsave(sprintf('%s_forward_Error_plot.pdf',prefix), path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
142 ggsave(sprintf('%s_forward_Error_plot.emf',prefix), path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
143 check3a <- yn_input('Proceed?\nThe estimated error rate (black line) should fit to the observed error rates for each consensus quality score (black points).\nAdditionally, the error rates expected under the nominal definition of the Q-value (quality score) should decrease as the quality score increases (or flat-line).\nIf you do not have a good fit, you may want dada2 to re-learn the error rates with a higher number of reads in the utilized subset.\nA subset of reads is always used as the algorithm is computationally intensive.\nDefault yes:',T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
144 }
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
145
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
146
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
147 ## also do for reverseL
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
148 cat('Learning error rate of reverse reads...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
149 errR <- learnErrors(filtRs, nreads=read.subset, multithread=threads)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
150 Error_r <- plotErrors(errR, nominalQ=T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
151 #print(Error_r)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
152 ggsave(sprintf('%s_reverse_Error_plot.pdf',prefix), Error_r, path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
153 #ggsave(sprintf('%s_reverse_Error_plot.emf',prefix), Error_r, path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
154 check3b <- T
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
155 while(check3b == F){
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
156 read.subset <- numeric_input('Please specify the number of reads you would like dada2 to utilize to calculate the error rate.\nThe default previously used was 1e6.\nThe newly recommended default is 10-fold greater,\n1e7:',1e7)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
157 errR <- learnErrors(filtRs, nreads=read.subset, multithread=threads)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
158 print(Error_r)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
159 ggsave(sprintf('%s_reverse_Error_plot.pdf',prefix), path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
160 #ggsave(sprintf('%s_reverse_Error_plot.emf',prefix), path = outdir, width = 20,height = 15,units = c("cm"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
161 check3b <- yn_input('Proceed?\nThe estimated error rate (black line) should fit to the observed error rates for each consensus quality score (black points).\nAdditionally, the error rates expected under the nominal definition of the Q-value (quality score) should decrease as the quality score increases (or flat-line).\nIf you do not have a good fit, you may want dada2 to re-learn the error rates with a higher number of reads in the utilized subset.\nA subset of reads is always used as the algorithm is computationally intensive.\nDefault yes:',T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
162 }
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
163
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
164 save(list = ls(all.names = TRUE), file = file.path(outdir, paste0(prefix, 'state_post_learning.Rdata')))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
165 #load(file = file.path(outdir, paste0(prefix, 'state_post_learning.Rdata')))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
166
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
167 # Dereplicate sequences to generate unique sequence fastq files with corresponding count tables-------------------------
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
168 ## NOTE: if your dataset is huge, you may run out of RAM. Please see https://benjjneb.github.io/dada2/bigdata.html for details.
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
169 cat('Dereplicating forward reads...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
170 derepFs <- derepFastq(filtFs, verbose=T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
171 cat('Dereplicating reverse reads...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
172 derepRs <- derepFastq(filtRs, verbose=T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
173
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
174 ## name the derep-class objects by sample names
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
175 names(derepFs) <- sample.names
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
176 names(derepRs) <- sample.names
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
177
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
178 save(list = ls(all.names = TRUE), file = file.path(outdir, paste0(prefix, 'state_post_derep.Rdata')))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
179 #load(file = file.path(outdir, paste0(prefix, 'state_post_derep.Rdata')))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
180
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
181 # Infer sequence variants using learned error rates---------------------------------
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
182 ## If not using Windows, you may set multithread to TRUE
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
183 ## NOTE: if your dataset is huge, you may run out of RAM. Please see https://benjjneb.github.io/dada2/bigdata.html for details.
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
184 ## NOTE2: you can use DADA2 for 454 or IonTorrent data as well. Please see https://benjjneb.github.io/dada2/tutorial.html.
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
185 s.pool = F
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
186 cat('Inferring sequence variants of forward reads...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
187 dadaFs <- dada(derepFs, err=errF, pool=s.pool, multithread=threads)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
188
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
189 ## have user inspect detected number of sequence variants, to ensure it is logical based on the biological context of their samples
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
190 ## if you have low sampling depths, you may not want to process each sample independently as per default, but set pool=T. It gives better results at increased computation time. The user will have the option to do this if the number of sequence variants doesn't make sense.
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
191 print(dadaFs[[1]])
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
192 check4 <- T
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
193 if(check4 == F){
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
194 s.pool = T
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
195 dadaFs <- dada(derepFs, err=errF, pool=s.pool, multithread=threads)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
196 print(dadaFs[[1]])
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
197 cat('Hopefully, these results make more sense.\nOtherwise, there is not much more you can do except start over!\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
198 check <- yn_input('Proceed? Default yes, no to quit:',T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
199 if(check == F){
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
200 stop()
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
201 }
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
202 }
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
203
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
204 ## also do for reverse, but don't need to re-check as you need to keep the pool consistent between the forward and reverse!
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
205 cat('Inferring sequence variants of reversed reads...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
206 dadaRs <- dada(derepRs, err=errR, pool=s.pool, multithread=threads)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
207
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
208 save(list = ls(all.names = TRUE), file = file.path(outdir, paste0(prefix, 'state_post_dada.Rdata')))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
209 #load(file = file.path(outdir, paste0(prefix, 'state_post_dada.Rdata')))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
210
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
211
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
212 # Merge forward and reverse reads-------------------------------------------------
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
213 cat('Merging paired-end reads...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
214 mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose=T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
215 #cat('Most of your reads should have been retained (i.e. were able to merge, see above).\nOtherwise, there is not much more you can do except start over (i.e. did you over-trim your sequences??)!\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
216 check <- T
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
217 if(check == F){
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
218 stop()
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
219 }
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
220
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
221 # Construct sequences table-------------------------------------------------------
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
222 cat('Constructing sequence table...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
223 seqtab <- makeSequenceTable(mergers)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
224
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
225 save(list = ls(all.names = TRUE), file = file.path(outdir, paste0(prefix, 'state_post_merge.Rdata')))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
226 #load(file = file.path(outdir, paste0(prefix, 'state_post_merge.Rdata')))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
227
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
228
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
229 ## inspect distribution of sequence lengths
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
230 ## give user the option to filter out overly long or short sequneces
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
231 cat('Sequence length distribution listed below:\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
232 print(table(nchar(getSequences(seqtab))))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
233 check5 <- T
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
234 if(check5 == F){
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
235 min.cutoff <- numeric_input('Please input desired minimum length of sequences:',NULL)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
236 max.cutoff <- numeric_input('Please input desired maximum length of sequences:',NULL)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
237 seqtab <- seqtab[,nchar(colnames(seqtab)) %in% seq(min.cutoff,max.cutoff)]
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
238 }
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
239
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
240 # Remove chimeras------------------------------------------------------------------
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
241 ## If not using Windows, you may set multithread to TRUE
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
242 cat('Removing chimeras...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
243 seqtab.nochim <- removeBimeraDenovo(seqtab, method='consensus', multithread=threads, verbose=T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
244
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
245 ## display percentage of chimeras removed
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
246 ## this number should be small (<5%), otherwise some processing parameters need to be revisited
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
247 percent.nochim <- (sum(seqtab.nochim)/sum(seqtab))*100
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
248 percent.nochim <- paste(as.character(percent.nochim),'of reads retained after chimera removal.\n',sep=' ')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
249 cat(percent.nochim)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
250 #cat('Most of your reads should have been retained.\nOtherwise, there is not much more you can do except start over!\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
251 check <- T
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
252 if(check == F){
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
253 stop()
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
254 }
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
255
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
256 # Final sanity check--------------------------------------------------------------
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
257 ## track reads removed throughout the pipeline
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
258 ## If processing a single sample, remove the sapply calls: e.g. replace sapply(dadaFs, getN) with getN(dadaFs)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
259 getN <- function(x) sum(getUniques(x))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
260 track <- cbind(out, sapply(dadaFs, getN), sapply(mergers, getN), rowSums(seqtab), rowSums(seqtab.nochim))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
261 colnames(track) <- c("input", "filtered", "denoised", "merged", "tabled", "nonchim")
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
262 rownames(track) <- sample.names
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
263 print(head(track))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
264 #cat('Most of your reads should have been retained.\nOtherwise, there is not much more you can do except start over!\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
265 check <- T
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
266 if(check == F){
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
267 stop()
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
268 }
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
269
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
270 write.csv(track,file=file.path(outdir, sprintf('%s_read_count-quality_control.csv',prefix)))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
271
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
272 save(list = ls(all.names = TRUE), file = file.path(outdir, paste0(prefix, 'state_post_chimera.Rdata')))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
273 # load(file = file.path(outdir, paste0(prefix, 'state_post_chimera.Rdata')))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
274
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
275 # Assign taxonomy-----------------------------------------------------------------
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
276 ## require silva database files
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
277 ## If not using Windows, you may set multithread to TRUE
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
278 ## Minimum boot strap should be 80, but for sequnce length =< 250 Minimum bootstrap set to 50 (which is also the default)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
279
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
280 ## SILVA
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
281 cat('Assigning taxonomy to genus level using SILVA...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
282 taxa_silva <- assignTaxonomy(seqtab.nochim, file.path(wd,"silva_nr_v132_train_set.fa.gz"), multithread=threads, minBoot=80, tryRC=T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
283 cat('Assigning taxonomy at species level using SILVA...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
284 taxa_silva <- addSpecies(taxa_silva, file.path(wd,"silva_species_assignment_v132.fa.gz"), allowMultiple=T, tryRC=T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
285 write.csv(taxa_silva,file=file.path(outdir, sprintf('%s_taxonomy_silva.csv',prefix)))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
286
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
287 ## RDP - used for copy number correction
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
288 cat('Assigning taxonomy to genus level using RDP...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
289 taxa_rdp <- assignTaxonomy(seqtab.nochim, file.path(wd,"rdp_train_set_16.fa.gz"), multithread=threads, minBoot=80, tryRC=T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
290 cat('Assigning taxonomy at species level using RDP...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
291 taxa_rdp <- addSpecies(taxa_rdp, file.path(wd,"rdp_species_assignment_16.fa.gz"), allowMultiple=T, tryRC=T)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
292 write.csv(taxa_rdp,file=file.path(outdir, sprintf('%s_taxonomy_rdp.csv',prefix)))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
293 save(list = ls(all.names = TRUE), file = file.path(outdir, paste0(prefix, 'state_post_tax.Rdata')))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
294 #load(file = file.path(outdir, paste0(prefix, 'state_post_tax.Rdata')))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
295
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
296
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
297 # Return data----------------------------------------------------------------------
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
298 cat('Returning data...\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
299 ## create fasta file
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
300 ASVs <- dada2fasta(outdir, seqtab.nochim, prefix)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
301 ## create master dataframe for each classification
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
302
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
303 ## Assigning ASVs to count table
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
304 sequences <- colnames(seqtab.nochim)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
305 colnames(seqtab.nochim) <- ASVs
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
306 seqtab.nochim <- t(seqtab.nochim)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
307
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
308 ## silva
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
309 taxa_silva <- taxa_silva[match(sequences, rownames(taxa_silva)),]
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
310 rownames(taxa_silva) <- ASVs
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
311 d <- merge(seqtab.nochim, taxa_silva, by='row.names')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
312 colnames(d)[1] <- 'ASV'
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
313 ## create database of all information
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
314 db <- dbConnect(RSQLite::SQLite(), file.path(outdir, sprintf('%s.sqlite',prefix)))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
315 dbWriteTable(db, 'dada2_results_silva', d)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
316 ## write master dataframe for user, and return it
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
317 write.table(d, file.path(outdir, sprintf('%s_dada2_results_silva.txt',prefix)), sep='\t', quote=F, row.names=F)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
318
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
319 ## rdp
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
320 taxa_rdp <- taxa_rdp[match(sequences, rownames(taxa_rdp)),]
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
321 rownames(taxa_rdp) <- ASVs
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
322 d <- merge(seqtab.nochim, taxa_rdp, by='row.names')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
323 colnames(d)[1] <- 'ASV'
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
324 ## create database of all information
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
325 dbWriteTable(db, 'dada2_results_rdp', d)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
326 dbDisconnect(db)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
327 ## write master dataframe for user, and return it
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
328 write.table(d, file.path(outdir, sprintf('%s_dada2_results_rdp.txt',prefix)), sep='\t', quote=F, row.names=F)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
329 return(d)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
330
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
331 cat('DADA2 processing completed!\n')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
332 }
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
333
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
334
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
335 option_list = list(
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
336 make_option(c("-t", "--threads"), type = "integer", default = 1,
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
337 help = "number of threads to use", metavar = "THREADS")
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
338 );
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
339
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
340 opt_parser = OptionParser(option_list=option_list);
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
341 opt = parse_args(opt_parser);
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
342
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
343 print(opt)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
344
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
345 threads = as.integer(Sys.getenv("NSLOTS", "1"))
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
346
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
347 exit(1)
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
348
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
349
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
350
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
351 dada2run(wd='/work/haange/Leaky_gut/', r_path='/work/haange/Leaky_gut', outdir='/work/haange/Leaky_gut/results', prefix='Leaky_gut')
98e24c66eeb2 planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
352