annotate data2.R @ 8:17fc10a6c5fc draft

planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit 7831a1e3225240028b30ae02808bdff9babaf368-dirty
author matthias
date Mon, 27 May 2019 05:33:20 -0400
parents 3f0bbe41052f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
1 library(dada2)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
2
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
3 # library(DBI)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
4 # library(ggplot2)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
5 library(optparse)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
6 # library(RSQLite)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
7 # library(stringr)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
8
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
9 ## source required R functions
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
10 source('user_input_functions.R')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
11
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
12 # print dada2 version
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
13 print(paste("dada2 version: ", packageVersion("dada2")))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
14
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
15 # # R function to create fasta file from dada2 output data
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
16 # outdir is directory to output fasta file
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
17 # taxa is taxonomy file generated by dada2
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
18 # prefix is string for desired prefix attached to output file names
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
19
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
20 dada2fasta <- function(outdir, seqtab.nochim, prefix){
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
21 seq <- colnames(seqtab.nochim)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
22 n <- 0
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
23 ASVs <- c()
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
24 for(i in seq){
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
25 n <- n + 1
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
26 ASV <- paste('ASV', as.character(n), sep = '_')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
27 ASVs <- c(ASVs, ASV)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
28 line1 <- paste('>',ASV,sep='')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
29 write(line1, file.path(outdir,sprintf('%s.fasta',prefix)), append=T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
30 write(i, file.path(outdir,sprintf('%s.fasta',prefix)), append=T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
31 }
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
32 return(ASVs)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
33 }
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
34
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
35
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
36 # # R DADA2 workflow
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
37 # wd is path to fastq files
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
38 # r_path is path to user_input_functions.R
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
39 # outdir is path to output directory
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
40 # prefix is string for desired prefix attached to output file names
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
41
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
42 dada2run <- function(wd, r_path, outdir, prefix){
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
43
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
44 # read-in files-------------------------------------------------------
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
45 ## obtain vectors of forward and reverse reads based on 'R1' and 'R2' in file names
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
46 ## additionally obtain the coressponding sample names for these files
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
47 p1 <- c()
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
48 p2 <- c()
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
49 sample.names <- c()
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
50 for(f in list.files(wd, full.names=T)){
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
51 if(grepl('_1.fq', f)){
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
52 sample <- gsub('^.*[/](.*?)_1\\.fq\\.gz', '\\1', f)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
53 sample.names <- c(sample.names, sample)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
54 p1 <- c(p1, f)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
55 }
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
56 if(grepl('_2.fq', f)){
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
57 p2 <- c(p2, f)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
58 }
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
59 }
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
60 fnFs <- sort(p1)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
61 fnRs <- sort(p2)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
62 sample.names <- sort(sample.names)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
63
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
64 save(list = ls(all.names = TRUE), file = file.path(outdir, paste0(prefix, 'state_test.Rdata')))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
65 #load(file = file.path(outdir, paste0(prefix, 'state_test.Rdata')))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
66
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
67 ## print for review
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
68 to_view <- data.frame(sample.names, fnFs, fnRs)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
69 cat("The following fastq files were found:\n")
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
70 print(to_view)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
71
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
72 # Perform quality filtering and trimming---------------------------------
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
73 ## assign new names to samples
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
74 filtFs <- file.path(outdir, paste0(sample.names, 'F_filt.fastq.gz'))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
75 filtRs <- file.path(outdir, paste0(sample.names, 'R_filt.fastq.gz'))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
76
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
77 ## plot forward and reverse quality so that user can decide on filtering parameters
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
78 cat('Plotting quality profile of forward reads...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
79 Fqp1 <- plotQualityProfile(fnFs[1])
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
80 #print(Fqp1)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
81 ggsave(sprintf('%s_forward_1_qualityprofile.pdf',prefix), Fqp1, path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
82 #ggsave(sprintf('%s_forward_1_qualityprofile.emf',prefix), Fqp1, path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
83 Fqp2 <- plotQualityProfile(fnFs[2])
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
84 #print(Fqp2)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
85 ggsave(sprintf('%s_forward_2_qualityprofile.pdf',prefix),Fqp2, path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
86 #ggsave(sprintf('%s_forward_2_qualityprofile.emf',prefix), Fqp2, path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
87 #cat('Which position would you like to truncate the forward reads at?\nPlease use the red-dashed lines as a guide, where they stop appearing indicates good quality.\nNOTE: Do NOT over-trim! You still require overlap between your forward and reverse reads in order to merge them later!\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
88 len1 <- 240
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
89 cat('Plotting quality profile of reverse reads...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
90 Rqp1 <- plotQualityProfile(fnRs[1])
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
91 #print(Rqp1)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
92 ggsave(sprintf('%s_reverse_1_qualityprofile.pdf',prefix),Rqp1, path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
93 #ggsave(sprintf('%s_reverse_1_qualityprofile.emf',prefix), Rqp1, path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
94 Rqp2 <- plotQualityProfile(fnRs[2])
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
95 #print(Rqp2)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
96 ggsave(sprintf('%s_reverse_2_qualityprofile.pdf',prefix), Rqp2, path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
97 #ggsave(sprintf('%s_reverse_2_qualityprofile.emf',prefix), Rqp2, path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
98 #cat('Which position would you like to truncate the forward reads at?\nPlease use the red-dashed lines as a guide, where they stop appearing indicates good quality.\nNOTE: Do NOT over-trim! You still require overlap between your forward and reverse reads in order to merge them later!\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
99 len2 <- 240
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
100
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
101 ## filter and trim
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
102 ## remaining parameters set to recommended defaults
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
103 ## maxN must be set to 0 (DADA2 requries no Ns)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
104 ## The maxEE parameter sets the maximum number of "expected errors" allowed in a read, which is a better filter than simply averaging quality scores.
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
105 ## If not using Windows, you may set multithread to TRUE
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
106 ## NOTE: Do not use the truncLen parameter for ITS sequencing
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
107 ## trimLeft needs to be varied based on the size of your primers (i.e. it is used to trim your primer sequences)!!
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
108 cat('Filtering and trimming sequences...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
109 out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(len1,len2), maxN=0, maxEE=c(2,2), truncQ=2, rm.phix=T, compress=T, multithread=threads, trimLeft=15)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
110
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
111 ## have user review read count changes, and relax error rate if too many reads are lost
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
112 ## for example, you may especially want to relax the number of expected errors on the reverse reads (i.e. maxEE=c(2,5)), as the reverse is prone to error on the Illumina sequencing platform
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
113 print(head(out))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
114 check2 <- T
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
115 while(check2 == F){
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
116 maxF <- numeric_input("What would you like the maximum number of expected errors in the forward reads to be?\nDefault 2:", 2)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
117 maxR <- numeric_input("What would you like the maximum number of expected errors in the reverse reads to be?\nDefault 5:", 5)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
118 out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(len1,len2), maxN=0, maxEE=c(maxF,maxR), truncQ=2, rm.phix=T, compress=T, multithread=threads)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
119 print(head(out))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
120 check2 <- yn_input('Proceed? If you lost too many reads, you can choose to not proceed and you will have the option to relax the error rate. Default yes:',T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
121 }
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
122
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
123 # Have DADA2 learn the error rates-----------------------------------------------
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
124 ## If not using Windows, you may set multithread to TRUE
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
125 read.subset <- 1e6
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
126 cat('Learning error rate of forward reads...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
127 errF <- learnErrors(filtFs, nreads=read.subset, multithread=threads)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
128
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
129 ## have user check estimated error rates
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
130 ## note the calculations are done with a subset of the total reads, as it is computationally intensive
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
131 ## if the fit is poor, the user has the option to try again with an increased subset number of reads
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
132 Error_f <- plotErrors(errF, nominalQ = T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
133 #print(Error_f)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
134 ggsave(sprintf('%s_forward_Error_plot.pdf',prefix), Error_f, path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
135 #ggsave(sprintf('%s_forward_Error_plot.emf',prefix), Error_f, path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
136 check3a <- T
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
137 while(check3a == F){
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
138 read.subset <- numeric_input('Please specify the number of reads you would like dada2 to utilize to calculate the error rate.\nThe default previously used was 1e6.\nThe newly recommended default is 10-fold greater,\n1e7:',1e7)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
139 errF <- learnErrors(filtFs, nreads=read.subset, multithread=threads)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
140 print(Error_f)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
141 ggsave(sprintf('%s_forward_Error_plot.pdf',prefix), path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
142 ggsave(sprintf('%s_forward_Error_plot.emf',prefix), path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
143 check3a <- yn_input('Proceed?\nThe estimated error rate (black line) should fit to the observed error rates for each consensus quality score (black points).\nAdditionally, the error rates expected under the nominal definition of the Q-value (quality score) should decrease as the quality score increases (or flat-line).\nIf you do not have a good fit, you may want dada2 to re-learn the error rates with a higher number of reads in the utilized subset.\nA subset of reads is always used as the algorithm is computationally intensive.\nDefault yes:',T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
144 }
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
145
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
146
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
147 ## also do for reverseL
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
148 cat('Learning error rate of reverse reads...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
149 errR <- learnErrors(filtRs, nreads=read.subset, multithread=threads)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
150 Error_r <- plotErrors(errR, nominalQ=T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
151 #print(Error_r)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
152 ggsave(sprintf('%s_reverse_Error_plot.pdf',prefix), Error_r, path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
153 #ggsave(sprintf('%s_reverse_Error_plot.emf',prefix), Error_r, path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
154 check3b <- T
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
155 while(check3b == F){
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
156 read.subset <- numeric_input('Please specify the number of reads you would like dada2 to utilize to calculate the error rate.\nThe default previously used was 1e6.\nThe newly recommended default is 10-fold greater,\n1e7:',1e7)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
157 errR <- learnErrors(filtRs, nreads=read.subset, multithread=threads)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
158 print(Error_r)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
159 ggsave(sprintf('%s_reverse_Error_plot.pdf',prefix), path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
160 #ggsave(sprintf('%s_reverse_Error_plot.emf',prefix), path = outdir, width = 20,height = 15,units = c("cm"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
161 check3b <- yn_input('Proceed?\nThe estimated error rate (black line) should fit to the observed error rates for each consensus quality score (black points).\nAdditionally, the error rates expected under the nominal definition of the Q-value (quality score) should decrease as the quality score increases (or flat-line).\nIf you do not have a good fit, you may want dada2 to re-learn the error rates with a higher number of reads in the utilized subset.\nA subset of reads is always used as the algorithm is computationally intensive.\nDefault yes:',T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
162 }
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
163
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
164 save(list = ls(all.names = TRUE), file = file.path(outdir, paste0(prefix, 'state_post_learning.Rdata')))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
165 #load(file = file.path(outdir, paste0(prefix, 'state_post_learning.Rdata')))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
166
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
167 # Dereplicate sequences to generate unique sequence fastq files with corresponding count tables-------------------------
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
168 ## NOTE: if your dataset is huge, you may run out of RAM. Please see https://benjjneb.github.io/dada2/bigdata.html for details.
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
169 cat('Dereplicating forward reads...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
170 derepFs <- derepFastq(filtFs, verbose=T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
171 cat('Dereplicating reverse reads...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
172 derepRs <- derepFastq(filtRs, verbose=T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
173
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
174 ## name the derep-class objects by sample names
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
175 names(derepFs) <- sample.names
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
176 names(derepRs) <- sample.names
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
177
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
178 save(list = ls(all.names = TRUE), file = file.path(outdir, paste0(prefix, 'state_post_derep.Rdata')))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
179 #load(file = file.path(outdir, paste0(prefix, 'state_post_derep.Rdata')))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
180
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
181 # Infer sequence variants using learned error rates---------------------------------
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
182 ## If not using Windows, you may set multithread to TRUE
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
183 ## NOTE: if your dataset is huge, you may run out of RAM. Please see https://benjjneb.github.io/dada2/bigdata.html for details.
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
184 ## NOTE2: you can use DADA2 for 454 or IonTorrent data as well. Please see https://benjjneb.github.io/dada2/tutorial.html.
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
185 s.pool = F
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
186 cat('Inferring sequence variants of forward reads...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
187 dadaFs <- dada(derepFs, err=errF, pool=s.pool, multithread=threads)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
188
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
189 ## have user inspect detected number of sequence variants, to ensure it is logical based on the biological context of their samples
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
190 ## if you have low sampling depths, you may not want to process each sample independently as per default, but set pool=T. It gives better results at increased computation time. The user will have the option to do this if the number of sequence variants doesn't make sense.
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
191 print(dadaFs[[1]])
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
192 check4 <- T
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
193 if(check4 == F){
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
194 s.pool = T
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
195 dadaFs <- dada(derepFs, err=errF, pool=s.pool, multithread=threads)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
196 print(dadaFs[[1]])
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
197 cat('Hopefully, these results make more sense.\nOtherwise, there is not much more you can do except start over!\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
198 check <- yn_input('Proceed? Default yes, no to quit:',T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
199 if(check == F){
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
200 stop()
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
201 }
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
202 }
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
203
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
204 ## also do for reverse, but don't need to re-check as you need to keep the pool consistent between the forward and reverse!
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
205 cat('Inferring sequence variants of reversed reads...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
206 dadaRs <- dada(derepRs, err=errR, pool=s.pool, multithread=threads)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
207
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
208 save(list = ls(all.names = TRUE), file = file.path(outdir, paste0(prefix, 'state_post_dada.Rdata')))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
209 #load(file = file.path(outdir, paste0(prefix, 'state_post_dada.Rdata')))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
210
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
211
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
212 # Merge forward and reverse reads-------------------------------------------------
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
213 cat('Merging paired-end reads...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
214 mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose=T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
215 #cat('Most of your reads should have been retained (i.e. were able to merge, see above).\nOtherwise, there is not much more you can do except start over (i.e. did you over-trim your sequences??)!\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
216 check <- T
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
217 if(check == F){
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
218 stop()
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
219 }
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
220
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
221 # Construct sequences table-------------------------------------------------------
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
222 cat('Constructing sequence table...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
223 seqtab <- makeSequenceTable(mergers)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
224
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
225 save(list = ls(all.names = TRUE), file = file.path(outdir, paste0(prefix, 'state_post_merge.Rdata')))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
226 #load(file = file.path(outdir, paste0(prefix, 'state_post_merge.Rdata')))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
227
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
228
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
229 ## inspect distribution of sequence lengths
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
230 ## give user the option to filter out overly long or short sequneces
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
231 cat('Sequence length distribution listed below:\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
232 print(table(nchar(getSequences(seqtab))))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
233 check5 <- T
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
234 if(check5 == F){
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
235 min.cutoff <- numeric_input('Please input desired minimum length of sequences:',NULL)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
236 max.cutoff <- numeric_input('Please input desired maximum length of sequences:',NULL)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
237 seqtab <- seqtab[,nchar(colnames(seqtab)) %in% seq(min.cutoff,max.cutoff)]
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
238 }
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
239
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
240 # Remove chimeras------------------------------------------------------------------
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
241 ## If not using Windows, you may set multithread to TRUE
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
242 cat('Removing chimeras...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
243 seqtab.nochim <- removeBimeraDenovo(seqtab, method='consensus', multithread=threads, verbose=T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
244
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
245 ## display percentage of chimeras removed
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
246 ## this number should be small (<5%), otherwise some processing parameters need to be revisited
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
247 percent.nochim <- (sum(seqtab.nochim)/sum(seqtab))*100
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
248 percent.nochim <- paste(as.character(percent.nochim),'of reads retained after chimera removal.\n',sep=' ')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
249 cat(percent.nochim)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
250 #cat('Most of your reads should have been retained.\nOtherwise, there is not much more you can do except start over!\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
251 check <- T
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
252 if(check == F){
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
253 stop()
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
254 }
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
255
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
256 # Final sanity check--------------------------------------------------------------
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
257 ## track reads removed throughout the pipeline
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
258 ## If processing a single sample, remove the sapply calls: e.g. replace sapply(dadaFs, getN) with getN(dadaFs)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
259 getN <- function(x) sum(getUniques(x))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
260 track <- cbind(out, sapply(dadaFs, getN), sapply(mergers, getN), rowSums(seqtab), rowSums(seqtab.nochim))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
261 colnames(track) <- c("input", "filtered", "denoised", "merged", "tabled", "nonchim")
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
262 rownames(track) <- sample.names
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
263 print(head(track))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
264 #cat('Most of your reads should have been retained.\nOtherwise, there is not much more you can do except start over!\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
265 check <- T
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
266 if(check == F){
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
267 stop()
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
268 }
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
269
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
270 write.csv(track,file=file.path(outdir, sprintf('%s_read_count-quality_control.csv',prefix)))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
271
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
272 save(list = ls(all.names = TRUE), file = file.path(outdir, paste0(prefix, 'state_post_chimera.Rdata')))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
273 # load(file = file.path(outdir, paste0(prefix, 'state_post_chimera.Rdata')))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
274
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
275 # Assign taxonomy-----------------------------------------------------------------
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
276 ## require silva database files
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
277 ## If not using Windows, you may set multithread to TRUE
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
278 ## Minimum boot strap should be 80, but for sequnce length =< 250 Minimum bootstrap set to 50 (which is also the default)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
279
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
280 ## SILVA
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
281 cat('Assigning taxonomy to genus level using SILVA...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
282 taxa_silva <- assignTaxonomy(seqtab.nochim, file.path(wd,"silva_nr_v132_train_set.fa.gz"), multithread=threads, minBoot=80, tryRC=T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
283 cat('Assigning taxonomy at species level using SILVA...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
284 taxa_silva <- addSpecies(taxa_silva, file.path(wd,"silva_species_assignment_v132.fa.gz"), allowMultiple=T, tryRC=T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
285 write.csv(taxa_silva,file=file.path(outdir, sprintf('%s_taxonomy_silva.csv',prefix)))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
286
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
287 ## RDP - used for copy number correction
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
288 cat('Assigning taxonomy to genus level using RDP...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
289 taxa_rdp <- assignTaxonomy(seqtab.nochim, file.path(wd,"rdp_train_set_16.fa.gz"), multithread=threads, minBoot=80, tryRC=T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
290 cat('Assigning taxonomy at species level using RDP...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
291 taxa_rdp <- addSpecies(taxa_rdp, file.path(wd,"rdp_species_assignment_16.fa.gz"), allowMultiple=T, tryRC=T)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
292 write.csv(taxa_rdp,file=file.path(outdir, sprintf('%s_taxonomy_rdp.csv',prefix)))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
293 save(list = ls(all.names = TRUE), file = file.path(outdir, paste0(prefix, 'state_post_tax.Rdata')))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
294 #load(file = file.path(outdir, paste0(prefix, 'state_post_tax.Rdata')))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
295
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
296
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
297 # Return data----------------------------------------------------------------------
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
298 cat('Returning data...\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
299 ## create fasta file
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
300 ASVs <- dada2fasta(outdir, seqtab.nochim, prefix)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
301 ## create master dataframe for each classification
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
302
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
303 ## Assigning ASVs to count table
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
304 sequences <- colnames(seqtab.nochim)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
305 colnames(seqtab.nochim) <- ASVs
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
306 seqtab.nochim <- t(seqtab.nochim)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
307
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
308 ## silva
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
309 taxa_silva <- taxa_silva[match(sequences, rownames(taxa_silva)),]
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
310 rownames(taxa_silva) <- ASVs
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
311 d <- merge(seqtab.nochim, taxa_silva, by='row.names')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
312 colnames(d)[1] <- 'ASV'
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
313 ## create database of all information
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
314 db <- dbConnect(RSQLite::SQLite(), file.path(outdir, sprintf('%s.sqlite',prefix)))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
315 dbWriteTable(db, 'dada2_results_silva', d)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
316 ## write master dataframe for user, and return it
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
317 write.table(d, file.path(outdir, sprintf('%s_dada2_results_silva.txt',prefix)), sep='\t', quote=F, row.names=F)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
318
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
319 ## rdp
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
320 taxa_rdp <- taxa_rdp[match(sequences, rownames(taxa_rdp)),]
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
321 rownames(taxa_rdp) <- ASVs
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
322 d <- merge(seqtab.nochim, taxa_rdp, by='row.names')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
323 colnames(d)[1] <- 'ASV'
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
324 ## create database of all information
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
325 dbWriteTable(db, 'dada2_results_rdp', d)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
326 dbDisconnect(db)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
327 ## write master dataframe for user, and return it
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
328 write.table(d, file.path(outdir, sprintf('%s_dada2_results_rdp.txt',prefix)), sep='\t', quote=F, row.names=F)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
329 return(d)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
330
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
331 cat('DADA2 processing completed!\n')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
332 }
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
333
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
334
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
335 option_list = list(
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
336 make_option(c("-t", "--threads"), type = "integer", default = 1,
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
337 help = "number of threads to use", metavar = "THREADS")
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
338 );
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
339
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
340 opt_parser = OptionParser(option_list=option_list);
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
341 opt = parse_args(opt_parser);
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
342
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
343 print(opt)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
344
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
345 threads = as.integer(Sys.getenv("NSLOTS", "1"))
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
346
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
347 exit(1)
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
348
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
349
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
350
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
351 dada2run(wd='/work/haange/Leaky_gut/', r_path='/work/haange/Leaky_gut', outdir='/work/haange/Leaky_gut/results', prefix='Leaky_gut')
3f0bbe41052f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/topic/dada2/tools/dada2 commit d63c84012410608b3b5d23e130f0beff475ce1f8-dirty
matthias
parents:
diff changeset
352