|
1
|
1 # ARGS: 1.inputType -String specifying format of input (fastq or table)
|
|
|
2 # IF inputType is "fastQ":
|
|
|
3 # 2*.fastqPath -One or more strings specifying path to fastq files
|
|
|
4 # 2.annoPath -String specifying path to hairpin annotation table
|
|
|
5 # 3.samplePath -String specifying path to sample annotation table
|
|
|
6 # 4.barStart -Integer specifying starting position of barcode
|
|
|
7 # 5.barEnd -Integer specifying ending position of barcode
|
|
|
8 # 6.hpStart -Integer specifying startins position of hairpin
|
|
|
9 # unique region
|
|
|
10 # 7.hpEnd -Integer specifying ending position of hairpin
|
|
|
11 # unique region
|
|
|
12 # ###
|
|
|
13 # IF inputType is "counts":
|
|
|
14 # 2.countPath -String specifying path to count table
|
|
|
15 # 3.annoPath -String specifying path to hairpin annotation table
|
|
|
16 # 4.samplePath -String specifying path to sample annotation table
|
|
|
17 # ###
|
|
|
18 # 8.cpmReq -Float specifying cpm requirement
|
|
|
19 # 9.sampleReq -Integer specifying cpm requirement
|
|
|
20 # 10.fdrThresh -Float specifying the FDR requirement
|
|
|
21 # 11.lfcThresh -Float specifying the log-fold-change requirement
|
|
|
22 # 12.workMode -String specifying exact test or GLM usage
|
|
|
23 # 13.htmlPath -String specifying path to HTML file
|
|
|
24 # 14.folderPath -STring specifying path to folder for output
|
|
|
25 # IF workMode is "classic" (exact test)
|
|
|
26 # 15.pairData[2] -String specifying first group for exact test
|
|
|
27 # 16.pairData[1] -String specifying second group for exact test
|
|
|
28 # ###
|
|
|
29 # IF workMode is "glm"
|
|
|
30 # 15.contrastData -String specifying contrasts to be made
|
|
|
31 # 16.roastOpt -String specifying usage of gene-wise tests
|
|
|
32 # 17.hairpinReq -String specifying hairpin requirement for gene-
|
|
|
33 # wise test
|
|
|
34 # 18.selectOpt -String specifying type of selection for barcode
|
|
|
35 # plots
|
|
|
36 # 19.selectVals -String specifying members selected for barcode
|
|
|
37 # plots
|
|
|
38 #
|
|
|
39 # OUT: Bar Plot of Counts Per Index
|
|
|
40 # Bar Plot of Counts Per Hairpin
|
|
|
41 # MDS Plot
|
|
|
42 # Smear Plot
|
|
|
43 # Barcode Plots (If Genewise testing was selected)
|
|
|
44 # Top Expression Table
|
|
|
45 # HTML file linking to the ouputs
|
|
|
46 #
|
|
|
47 # Author: Shian Su - registertonysu@gmail.com - Jan 2014
|
|
|
48
|
|
0
|
49 # Record starting time
|
|
|
50 timeStart <- as.character(Sys.time())
|
|
|
51
|
|
|
52 # Loading and checking required packages
|
|
|
53 library(methods, quietly=TRUE, warn.conflicts=FALSE)
|
|
|
54 library(statmod, quietly=TRUE, warn.conflicts=FALSE)
|
|
|
55 library(splines, quietly=TRUE, warn.conflicts=FALSE)
|
|
|
56 library(edgeR, quietly=TRUE, warn.conflicts=FALSE)
|
|
|
57 library(limma, quietly=TRUE, warn.conflicts=FALSE)
|
|
|
58
|
|
|
59 if (packageVersion("edgeR") < "3.5.23") {
|
|
|
60 message("Please update 'edgeR' to version >= 3.5.23 to run this script")
|
|
|
61 }
|
|
|
62
|
|
|
63 ################################################################################
|
|
|
64 ### Function declarations
|
|
|
65 ################################################################################
|
|
|
66
|
|
|
67 # Function to sanitise contrast equations so there are no whitespaces
|
|
|
68 # surrounding the arithmetic operators, leading or trailing whitespace
|
|
|
69 sanitiseEquation <- function(equation) {
|
|
|
70 equation <- gsub(" *[+] *", "+", equation)
|
|
|
71 equation <- gsub(" *[-] *", "-", equation)
|
|
|
72 equation <- gsub(" *[/] *", "/", equation)
|
|
|
73 equation <- gsub(" *[*] *", "*", equation)
|
|
|
74 equation <- gsub("^\\s+|\\s+$", "", equation)
|
|
|
75 return(equation)
|
|
|
76 }
|
|
|
77
|
|
|
78 # Function to sanitise group information
|
|
|
79 sanitiseGroups <- function(string) {
|
|
|
80 string <- gsub(" *[,] *", ",", string)
|
|
|
81 string <- gsub("^\\s+|\\s+$", "", string)
|
|
|
82 return(string)
|
|
|
83 }
|
|
|
84
|
|
|
85 # Function to change periods to whitespace in a string
|
|
|
86 unmake.names <- function(string) {
|
|
|
87 string <- gsub(".", " ", string, fixed=TRUE)
|
|
|
88 return(string)
|
|
|
89 }
|
|
|
90
|
|
|
91 # Function has string input and generates an output path string
|
|
|
92 makeOut <- function(filename) {
|
|
|
93 return(paste0(folderPath, "/", filename))
|
|
|
94 }
|
|
|
95
|
|
|
96 # Function has string input and generates both a pdf and png output strings
|
|
|
97 imgOut <- function(filename) {
|
|
|
98 assign(paste0(filename, "Png"), makeOut(paste0(filename,".png")),
|
|
|
99 envir = .GlobalEnv)
|
|
|
100 assign(paste0(filename, "Pdf"), makeOut(paste0(filename,".pdf")),
|
|
|
101 envir = .GlobalEnv)
|
|
|
102 }
|
|
|
103
|
|
|
104 # Create cat function default path set, default seperator empty and appending
|
|
|
105 # true by default (Ripped straight from the cat function with altered argument
|
|
|
106 # defaults)
|
|
|
107 cata <- function(..., file = htmlPath, sep = "", fill = FALSE, labels = NULL,
|
|
|
108 append = TRUE) {
|
|
|
109 if (is.character(file))
|
|
|
110 if (file == "")
|
|
|
111 file <- stdout()
|
|
|
112 else if (substring(file, 1L, 1L) == "|") {
|
|
|
113 file <- pipe(substring(file, 2L), "w")
|
|
|
114 on.exit(close(file))
|
|
|
115 }
|
|
|
116 else {
|
|
|
117 file <- file(file, ifelse(append, "a", "w"))
|
|
|
118 on.exit(close(file))
|
|
|
119 }
|
|
|
120 .Internal(cat(list(...), file, sep, fill, labels, append))
|
|
|
121 }
|
|
|
122
|
|
|
123 # Function to write code for html head and title
|
|
|
124 HtmlHead <- function(title) {
|
|
|
125 cata("<head>\n")
|
|
|
126 cata("<title>", title, "</title>\n")
|
|
|
127 cata("</head>\n")
|
|
|
128 }
|
|
|
129
|
|
|
130 # Function to write code for html links
|
|
|
131 HtmlLink <- function(address, label=address) {
|
|
|
132 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n")
|
|
|
133 }
|
|
|
134
|
|
|
135 # Function to write code for html images
|
|
|
136 HtmlImage <- function(source, label=source, height=600, width=600) {
|
|
|
137 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
|
|
|
138 cata("\" width=\"", width, "\"/>\n")
|
|
|
139 }
|
|
|
140
|
|
|
141 # Function to write code for html list items
|
|
|
142 ListItem <- function(...) {
|
|
|
143 cata("<li>", ..., "</li>\n")
|
|
|
144 }
|
|
|
145
|
|
|
146 TableItem <- function(...) {
|
|
|
147 cata("<td>", ..., "</td>\n")
|
|
|
148 }
|
|
|
149
|
|
|
150 TableHeadItem <- function(...) {
|
|
|
151 cata("<th>", ..., "</th>\n")
|
|
|
152 }
|
|
|
153 ################################################################################
|
|
|
154 ### Input Processing
|
|
|
155 ################################################################################
|
|
|
156
|
|
|
157 # Grabbing arguments from command line
|
|
|
158 argv <- commandArgs(TRUE)
|
|
|
159
|
|
|
160 # Remove fastq file paths after collecting from argument vector
|
|
|
161 inputType <- as.character(argv[1])
|
|
|
162 if (inputType=="fastq") {
|
|
|
163 fastqPath <- as.character(gsub("fastq::", "", argv[grepl("fastq::", argv)],
|
|
|
164 fixed=TRUE))
|
|
|
165 argv <- argv[!grepl("fastq::", argv, fixed=TRUE)]
|
|
1
|
166 annoPath <- as.character(argv[2])
|
|
0
|
167 samplePath <- as.character(argv[3])
|
|
|
168 barStart <- as.numeric(argv[4])
|
|
|
169 barEnd <- as.numeric(argv[5])
|
|
|
170 hpStart <- as.numeric(argv[6])
|
|
|
171 hpEnd <- as.numeric(argv[7])
|
|
|
172 } else if (inputType=="counts") {
|
|
|
173 countPath <- as.character(argv[2])
|
|
|
174 annoPath <- as.character(argv[3])
|
|
|
175 samplePath <- as.character(argv[4])
|
|
|
176 }
|
|
|
177
|
|
|
178 cpmReq <- as.numeric(argv[8])
|
|
|
179 sampleReq <- as.numeric(argv[9])
|
|
|
180 fdrThresh <- as.numeric(argv[10])
|
|
|
181 lfcThresh <- as.numeric(argv[11])
|
|
|
182 workMode <- as.character(argv[12])
|
|
|
183 htmlPath <- as.character(argv[13])
|
|
|
184 folderPath <- as.character(argv[14])
|
|
|
185 if (workMode=="classic") {
|
|
|
186 pairData <- character()
|
|
|
187 pairData[2] <- as.character(argv[15])
|
|
|
188 pairData[1] <- as.character(argv[16])
|
|
|
189 } else if (workMode=="glm") {
|
|
|
190 contrastData <- as.character(argv[15])
|
|
|
191 roastOpt <- as.character(argv[16])
|
|
|
192 hairpinReq <- as.numeric(argv[17])
|
|
|
193 selectOpt <- as.character(argv[18])
|
|
|
194 selectVals <- as.character(argv[19])
|
|
|
195 }
|
|
|
196
|
|
|
197 # Read in inputs
|
|
1
|
198
|
|
|
199 samples <- read.table(samplePath, header=TRUE, sep="\t")
|
|
|
200 anno <- read.table(annoPath, header=TRUE, sep="\t")
|
|
|
201 if (inputType=="counts") {
|
|
0
|
202 counts <- read.table(countPath, header=TRUE, sep="\t")
|
|
|
203 }
|
|
1
|
204
|
|
0
|
205 ###################### Check inputs for correctness ############################
|
|
|
206 samples$ID <- make.names(samples$ID)
|
|
|
207
|
|
|
208 if (!any(grepl("group", names(samples)))) {
|
|
|
209 stop("'group' column not specified in sample annotation file")
|
|
|
210 } # Check if grouping variable has been specified
|
|
|
211
|
|
|
212 if (any(table(samples$ID)>1)){
|
|
|
213 tab <- table(samples$ID)
|
|
|
214 offenders <- paste(names(tab[tab>1]), collapse=", ")
|
|
|
215 offenders <- unmake.names(offenders)
|
|
1
|
216 stop("'ID' column of sample annotation must have unique values, values ",
|
|
0
|
217 offenders, " are repeated")
|
|
|
218 } # Check that IDs in sample annotation are unique
|
|
|
219
|
|
|
220 if (inputType=="fastq") {
|
|
1
|
221
|
|
|
222 if (any(table(anno$ID)>1)){
|
|
|
223 tab <- table(anno$ID)
|
|
0
|
224 offenders <- paste(names(tab[tab>1]), collapse=", ")
|
|
1
|
225 stop("'ID' column of hairpin annotation must have unique values, values ",
|
|
0
|
226 offenders, " are repeated")
|
|
|
227 } # Check that IDs in hairpin annotation are unique
|
|
1
|
228
|
|
0
|
229 } else if (inputType=="counts") {
|
|
1
|
230 if (any(is.na(match(samples$ID, colnames(counts))))) {
|
|
|
231 stop("not all samples have groups specified")
|
|
|
232 } # Check that a group has be specifed for each sample
|
|
0
|
233
|
|
|
234 if (any(table(counts$ID)>1)){
|
|
|
235 tab <- table(counts$ID)
|
|
|
236 offenders <- paste(names(tab[tab>1]), collapse=", ")
|
|
1
|
237 stop("'ID' column of count table must have unique values, values ",
|
|
0
|
238 offenders, " are repeated")
|
|
|
239 } # Check that IDs in count table are unique
|
|
|
240 }
|
|
1
|
241 if (workMode=="glm") {
|
|
|
242 if (roastOpt == "yes") {
|
|
|
243 if (is.na(match("Gene", colnames(anno)))) {
|
|
|
244 tempStr <- paste("Gene-wise tests selected but'Gene' column not",
|
|
|
245 "specified in hairpin annotation file")
|
|
|
246 stop(tempStr)
|
|
|
247 }
|
|
|
248 }
|
|
|
249 }
|
|
|
250
|
|
0
|
251 ################################################################################
|
|
|
252
|
|
|
253 # Process arguments
|
|
|
254 if (workMode=="glm") {
|
|
|
255 if (roastOpt=="yes") {
|
|
|
256 wantRoast <- TRUE
|
|
|
257 } else {
|
|
|
258 wantRoast <- FALSE
|
|
|
259 }
|
|
|
260 }
|
|
|
261
|
|
|
262 # Split up contrasts seperated by comma into a vector and replace spaces with
|
|
|
263 # periods
|
|
|
264 if (exists("contrastData")) {
|
|
|
265 contrastData <- unlist(strsplit(contrastData, split=","))
|
|
|
266 contrastData <- sanitiseEquation(contrastData)
|
|
|
267 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)
|
|
|
268 }
|
|
|
269
|
|
|
270 # Replace spaces with periods in pair data
|
|
|
271 if (exists("pairData")) {
|
|
|
272 pairData <- make.names(pairData)
|
|
|
273 }
|
|
|
274
|
|
|
275 # Generate output folder and paths
|
|
|
276 dir.create(folderPath)
|
|
|
277
|
|
|
278 # Generate links for outputs
|
|
|
279 imgOut("barHairpin")
|
|
|
280 imgOut("barIndex")
|
|
|
281 imgOut("mds")
|
|
|
282 imgOut("bcv")
|
|
|
283 if (workMode == "classic") {
|
|
|
284 smearPng <- makeOut(paste0("smear(", pairData[2], "-", pairData[1],").png"))
|
|
|
285 smearPdf <- makeOut(paste0("smear(", pairData[2], "-", pairData[1],").pdf"))
|
|
|
286 topOut <- makeOut(paste0("toptag(", pairData[2], "-", pairData[1],").tsv"))
|
|
|
287 } else if (workMode=="glm") {
|
|
|
288 smearPng <- character()
|
|
|
289 smearPdf <- character()
|
|
|
290 topOut <- character()
|
|
|
291 roastOut <- character()
|
|
|
292 barcodePng <- character()
|
|
|
293 barcodePdf <- character()
|
|
|
294 for (i in 1:length(contrastData)) {
|
|
|
295 smearPng[i] <- makeOut(paste0("smear(", contrastData[i], ").png"))
|
|
|
296 smearPdf[i] <- makeOut(paste0("smear(", contrastData[i], ").pdf"))
|
|
|
297 topOut[i] <- makeOut(paste0("toptag(", contrastData[i], ").tsv"))
|
|
|
298 roastOut[i] <- makeOut(paste0("roast(", contrastData[i], ").tsv"))
|
|
|
299 barcodePng[i] <- makeOut(paste0("barcode(", contrastData[i], ").png"))
|
|
|
300 barcodePdf[i] <- makeOut(paste0("barcode(", contrastData[i], ").pdf"))
|
|
|
301 }
|
|
|
302 }
|
|
|
303 # Initialise data for html links and images, table with the link label and
|
|
|
304 # link address
|
|
|
305 linkData <- data.frame(Label=character(), Link=character(),
|
|
|
306 stringsAsFactors=FALSE)
|
|
|
307 imageData <- data.frame(Label=character(), Link=character(),
|
|
|
308 stringsAsFactors=FALSE)
|
|
|
309 ################################################################################
|
|
|
310 ### Data Processing
|
|
|
311 ################################################################################
|
|
|
312
|
|
|
313 # Transform gene selection from string into index values for mroast
|
|
|
314 if (workMode=="glm") {
|
|
|
315 if (selectOpt=="rank") {
|
|
|
316 selectVals <- gsub(" ", "", selectVals, fixed=TRUE)
|
|
|
317 selectVals <- unlist(strsplit(selectVals, ","))
|
|
|
318
|
|
|
319 for (i in 1:length(selectVals)) {
|
|
|
320 if (grepl(":", selectVals[i], fixed=TRUE)) {
|
|
|
321 temp <- unlist(strsplit(selectVals[i], ":"))
|
|
|
322 selectVals <- selectVals[-i]
|
|
|
323 a <- as.numeric(temp[1])
|
|
|
324 b <- as.numeric(temp[2])
|
|
|
325 selectVals <- c(selectVals, a:b)
|
|
|
326 }
|
|
|
327 }
|
|
|
328 selectVals <- as.numeric(unique(selectVals))
|
|
|
329 } else {
|
|
|
330 selectVals <- gsub(" ", "", selectVals, fixed=TRUE)
|
|
|
331 selectVals <- unlist(strsplit(selectVals, " "))
|
|
|
332 }
|
|
|
333 }
|
|
|
334
|
|
|
335 if (inputType=="fastq") {
|
|
1
|
336 # Use EdgeR hairpin process and capture outputs
|
|
|
337 hpReadout <- capture.output(
|
|
|
338 data <- processHairpinReads(fastqPath, samplePath, annoPath,
|
|
0
|
339 hairpinStart=hpStart, hairpinEnd=hpEnd,
|
|
|
340 verbose=TRUE)
|
|
1
|
341 )
|
|
|
342
|
|
|
343 # Remove function output entries that show processing data or is empty
|
|
|
344 hpReadout <- hpReadout[hpReadout!=""]
|
|
|
345 hpReadout <- hpReadout[!grepl("Processing", hpReadout)]
|
|
|
346 hpReadout <- hpReadout[!grepl("in file", hpReadout)]
|
|
|
347 hpReadout <- gsub(" -- ", "", hpReadout, fixed=TRUE)
|
|
|
348
|
|
|
349 # Make the names of groups syntactically valid (replace spaces with periods)
|
|
|
350 data$samples$group <- make.names(data$samples$group)
|
|
|
351 } else if (inputType=="counts") {
|
|
0
|
352 # Process counts information, set ID column to be row names
|
|
|
353 rownames(counts) <- counts$ID
|
|
|
354 counts <- counts[ , !(colnames(counts)=="ID")]
|
|
|
355 countsRows <- nrow(counts)
|
|
|
356
|
|
|
357 # Process group information
|
|
|
358 factors <- samples$group[match(samples$ID, colnames(counts))]
|
|
|
359 annoRows <- nrow(anno)
|
|
|
360 anno <- anno[match(rownames(counts), anno$ID), ]
|
|
|
361 annoMatched <- sum(!is.na(anno$ID))
|
|
|
362
|
|
|
363 if (any(is.na(anno$ID))) {
|
|
|
364 warningStr <- paste("count table contained more hairpins than",
|
|
|
365 "specified in hairpin annotation file")
|
|
|
366 warning(warningStr)
|
|
|
367 }
|
|
|
368
|
|
|
369 # Filter out rows with zero counts
|
|
|
370 sel <- rowSums(counts)!=0
|
|
|
371 counts <- counts[sel, ]
|
|
|
372 anno <- anno[sel, ]
|
|
|
373
|
|
|
374 # Create DGEList
|
|
|
375 data <- DGEList(counts=counts, lib.size=colSums(counts),
|
|
|
376 norm.factors=rep(1,ncol(counts)), genes=anno, group=factors)
|
|
1
|
377
|
|
0
|
378 # Make the names of groups syntactically valid (replace spaces with periods)
|
|
|
379 data$samples$group <- make.names(data$samples$group)
|
|
|
380 }
|
|
|
381
|
|
|
382 # Filter hairpins with low counts
|
|
1
|
383 preFilterCount <- nrow(data)
|
|
0
|
384 sel <- rowSums(cpm(data$counts) > cpmReq) >= sampleReq
|
|
|
385 data <- data[sel, ]
|
|
1
|
386 postFilterCount <- nrow(data)
|
|
|
387 filteredCount <- preFilterCount-postFilterCount
|
|
0
|
388
|
|
|
389 # Estimate dispersions
|
|
|
390 data <- estimateDisp(data)
|
|
|
391 commonBCV <- sqrt(data$common.dispersion)
|
|
|
392
|
|
|
393 ################################################################################
|
|
|
394 ### Output Processing
|
|
|
395 ################################################################################
|
|
|
396
|
|
|
397 # Plot number of hairpins that could be matched per sample
|
|
|
398 png(barIndexPng, width=600, height=600)
|
|
|
399 barplot(height<-colSums(data$counts), las=2, main="Counts per index",
|
|
|
400 cex.names=1.0, cex.axis=0.8, ylim=c(0, max(height)*1.2))
|
|
|
401 imageData[1, ] <- c("Counts per Index", "barIndex.png")
|
|
|
402 invisible(dev.off())
|
|
|
403
|
|
|
404 pdf(barIndexPdf)
|
|
|
405 barplot(height<-colSums(data$counts), las=2, main="Counts per index",
|
|
|
406 cex.names=1.0, cex.axis=0.8, ylim=c(0, max(height)*1.2))
|
|
|
407 linkData[1, ] <- c("Counts per Index Barplot (.pdf)", "barIndex.pdf")
|
|
|
408 invisible(dev.off())
|
|
|
409
|
|
|
410 # Plot per hairpin totals across all samples
|
|
|
411 png(barHairpinPng, width=600, height=600)
|
|
|
412 if (nrow(data$counts)<50) {
|
|
|
413 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin",
|
|
|
414 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2))
|
|
|
415 } else {
|
|
|
416 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin",
|
|
|
417 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2),
|
|
|
418 names.arg=FALSE)
|
|
|
419 }
|
|
|
420 imageData <- rbind(imageData, c("Counts per Hairpin", "barHairpin.png"))
|
|
|
421 invisible(dev.off())
|
|
|
422
|
|
|
423 pdf(barHairpinPdf)
|
|
|
424 if (nrow(data$counts)<50) {
|
|
|
425 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin",
|
|
|
426 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2))
|
|
|
427 } else {
|
|
|
428 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin",
|
|
|
429 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2),
|
|
|
430 names.arg=FALSE)
|
|
|
431 }
|
|
|
432 newEntry <- c("Counts per Hairpin Barplot (.pdf)", "barHairpin.pdf")
|
|
|
433 linkData <- rbind(linkData, newEntry)
|
|
|
434 invisible(dev.off())
|
|
|
435
|
|
|
436 # Make an MDS plot to visualise relationships between replicate samples
|
|
|
437 png(mdsPng, width=600, height=600)
|
|
|
438 plotMDS(data, labels=data$samples$group, col=as.numeric(data$samples$group),
|
|
|
439 main="MDS Plot")
|
|
|
440 imageData <- rbind(imageData, c("MDS Plot", "mds.png"))
|
|
|
441 invisible(dev.off())
|
|
|
442
|
|
|
443 pdf(mdsPdf)
|
|
|
444 plotMDS(data, labels=data$samples$group, col=as.numeric(data$samples$group),
|
|
|
445 main="MDS Plot")
|
|
|
446 newEntry <- c("MDS Plot (.pdf)", "mds.pdf")
|
|
|
447 linkData <- rbind(linkData, newEntry)
|
|
|
448 invisible(dev.off())
|
|
|
449
|
|
|
450 if (workMode=="classic") {
|
|
|
451 # Assess differential representation using classic exact testing methodology
|
|
|
452 # in edgeR
|
|
|
453 testData <- exactTest(data, pair=pairData)
|
|
|
454
|
|
|
455 top <- topTags(testData, n=Inf)
|
|
|
456 topIDs <- top$table[(top$table$FDR < fdrThresh) &
|
|
|
457 (abs(top$table$logFC) > lfcThresh), 1]
|
|
|
458 write.table(top, file=topOut, row.names=FALSE, sep="\t")
|
|
|
459 linkName <- paste0("Top Tags Table(", pairData[2], "-", pairData[1],
|
|
|
460 ") (.tsv)")
|
|
|
461 linkAddr <- paste0("toptag(", pairData[2], "-", pairData[1], ").tsv")
|
|
|
462 linkData <- rbind(linkData, c(linkName, linkAddr))
|
|
|
463
|
|
|
464 # Select hairpins with FDR < 0.05 to highlight on plot
|
|
|
465 png(smearPng, width=600, height=600)
|
|
|
466 plotTitle <- gsub(".", " ",
|
|
|
467 paste0("Smear Plot: ", pairData[2], "-", pairData[1]),
|
|
|
468 fixed = TRUE)
|
|
|
469 plotSmear(testData, de.tags=topIDs,
|
|
|
470 pch=20, cex=1.0, main=plotTitle)
|
|
|
471 abline(h = c(-1, 0, 1), col = c("dodgerblue", "yellow", "dodgerblue"), lty=2)
|
|
|
472 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ")")
|
|
|
473 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1],").png")
|
|
|
474 imageData <- rbind(imageData, c(imgName, imgAddr))
|
|
|
475 invisible(dev.off())
|
|
|
476
|
|
|
477 pdf(smearPdf)
|
|
|
478 plotTitle <- gsub(".", " ",
|
|
|
479 paste0("Smear Plot: ", pairData[2], "-", pairData[1]),
|
|
|
480 fixed = TRUE)
|
|
|
481 plotSmear(testData, de.tags=topIDs,
|
|
|
482 pch=20, cex=1.0, main=plotTitle)
|
|
|
483 abline(h = c(-1, 0, 1), col = c("dodgerblue", "yellow", "dodgerblue"), lty=2)
|
|
|
484 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ") (.pdf)")
|
|
|
485 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1], ").pdf")
|
|
|
486 linkData <- rbind(linkData, c(imgName, imgAddr))
|
|
|
487 invisible(dev.off())
|
|
|
488 } else if (workMode=="glm") {
|
|
|
489 # Generating design information
|
|
|
490 factors <- factor(data$sample$group)
|
|
|
491 design <- model.matrix(~0+factors)
|
|
|
492
|
|
|
493 colnames(design) <- gsub("factors", "", colnames(design), fixed=TRUE)
|
|
|
494
|
|
|
495 # Split up contrasts seperated by comma into a vector
|
|
|
496 contrastData <- unlist(strsplit(contrastData, split=","))
|
|
|
497 for (i in 1:length(contrastData)) {
|
|
|
498 # Generate contrasts information
|
|
|
499 contrasts <- makeContrasts(contrasts=contrastData[i], levels=design)
|
|
|
500
|
|
|
501 # Fit negative bionomial GLM
|
|
|
502 fit = glmFit(data, design)
|
|
|
503 # Carry out Likelihood ratio test
|
|
|
504 testData = glmLRT(fit, contrast=contrasts)
|
|
|
505
|
|
|
506 # Select hairpins with FDR < 0.05 to highlight on plot
|
|
|
507 top <- topTags(testData, n=Inf)
|
|
|
508 topIDs <- top$table[(top$table$FDR < fdrThresh) &
|
|
|
509 (abs(top$table$logFC) > lfcThresh), 1]
|
|
|
510 write.table(top, file=topOut[i], row.names=FALSE, sep="\t")
|
|
|
511
|
|
|
512 linkName <- paste0("Top Tags Table(", contrastData[i], ") (.tsv)")
|
|
|
513 linkAddr <- paste0("toptag(", contrastData[i], ").tsv")
|
|
|
514 linkData <- rbind(linkData, c(linkName, linkAddr))
|
|
|
515
|
|
|
516 # Make a plot of logFC versus logCPM
|
|
|
517 png(smearPng[i], height=600, width=600)
|
|
|
518 plotTitle <- paste("Smear Plot:", gsub(".", " ", contrastData[i],
|
|
|
519 fixed=TRUE))
|
|
|
520 plotSmear(testData, de.tags=topIDs, pch=20, cex=0.8, main=plotTitle)
|
|
|
521 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2)
|
|
|
522
|
|
|
523 imgName <- paste0("Smear Plot(", contrastData[i], ")")
|
|
|
524 imgAddr <- paste0("smear(", contrastData[i], ").png")
|
|
|
525 imageData <- rbind(imageData, c(imgName, imgAddr))
|
|
|
526 invisible(dev.off())
|
|
|
527
|
|
|
528 pdf(smearPdf[i])
|
|
|
529 plotTitle <- paste("Smear Plot:", gsub(".", " ", contrastData[i],
|
|
|
530 fixed=TRUE))
|
|
|
531 plotSmear(testData, de.tags=topIDs, pch=20, cex=0.8, main=plotTitle)
|
|
|
532 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2)
|
|
|
533
|
|
|
534 linkName <- paste0("Smear Plot(", contrastData[i], ") (.pdf)")
|
|
|
535 linkAddr <- paste0("smear(", contrastData[i], ").pdf")
|
|
|
536 linkData <- rbind(linkData, c(linkName, linkAddr))
|
|
|
537 invisible(dev.off())
|
|
|
538
|
|
|
539 genes <- as.character(data$genes$Gene)
|
|
|
540 unq <- unique(genes)
|
|
|
541 unq <- unq[!is.na(unq)]
|
|
|
542 geneList <- list()
|
|
|
543 for (gene in unq) {
|
|
|
544 if (length(which(genes==gene)) >= hairpinReq) {
|
|
|
545 geneList[[gene]] <- which(genes==gene)
|
|
|
546 }
|
|
|
547 }
|
|
|
548
|
|
|
549 if (wantRoast) {
|
|
|
550 # Input preparaton for roast
|
|
|
551 nrot = 9999
|
|
|
552 set.seed(602214129)
|
|
|
553 roastData <- mroast(data, index=geneList, design=design,
|
|
|
554 contrast=contrasts, nrot=nrot)
|
|
|
555 roastData <- cbind(GeneID=rownames(roastData), roastData)
|
|
|
556 write.table(roastData, file=roastOut[i], row.names=FALSE, sep="\t")
|
|
|
557 linkName <- paste0("Gene Level Analysis Table(", contrastData[i],
|
|
|
558 ") (.tsv)")
|
|
|
559 linkAddr <- paste0("roast(", contrastData[i], ").tsv")
|
|
|
560 linkData <- rbind(linkData, c(linkName, linkAddr))
|
|
|
561 if (selectOpt=="rank") {
|
|
|
562 selectedGenes <- rownames(roastData)[selectVals]
|
|
|
563 } else {
|
|
|
564 selectedGenes <- selectVals
|
|
|
565 }
|
|
|
566
|
|
|
567 if (packageVersion("limma")<"3.19.19") {
|
|
|
568 png(barcodePng[i], width=600, height=length(selectedGenes)*150)
|
|
|
569 } else {
|
|
|
570 png(barcodePng[i], width=600, height=length(selectedGenes)*300)
|
|
|
571 }
|
|
|
572 par(mfrow=c(length(selectedGenes), 1))
|
|
|
573 for (gene in selectedGenes) {
|
|
|
574 barcodeplot(testData$table$logFC, index=geneList[[gene]],
|
|
|
575 main=paste("Barcode Plot for", gene, "(logFCs)",
|
|
|
576 gsub(".", " ", contrastData[i])),
|
|
|
577 labels=c("Positive logFC", "Negative logFC"))
|
|
|
578 }
|
|
|
579 imgName <- paste0("Barcode Plot(", contrastData[i], ")")
|
|
|
580 imgAddr <- paste0("barcode(", contrastData[i], ").png")
|
|
|
581 imageData <- rbind(imageData, c(imgName, imgAddr))
|
|
|
582 dev.off()
|
|
|
583 if (packageVersion("limma")<"3.19.19") {
|
|
|
584 pdf(barcodePdf[i], width=8, height=2)
|
|
|
585 } else {
|
|
|
586 pdf(barcodePdf[i], width=8, height=4)
|
|
|
587 }
|
|
|
588 for (gene in selectedGenes) {
|
|
|
589 barcodeplot(testData$table$logFC, index=geneList[[gene]],
|
|
|
590 main=paste("Barcode Plot for", gene, "(logFCs)",
|
|
|
591 gsub(".", " ", contrastData[i])),
|
|
|
592 labels=c("Positive logFC", "Negative logFC"))
|
|
|
593 }
|
|
|
594 linkName <- paste0("Barcode Plot(", contrastData[i], ") (.pdf)")
|
|
|
595 linkAddr <- paste0("barcode(", contrastData[i], ").pdf")
|
|
|
596 linkData <- rbind(linkData, c(linkName, linkAddr))
|
|
|
597 dev.off()
|
|
|
598 }
|
|
|
599 }
|
|
|
600 }
|
|
|
601
|
|
1
|
602 # Record ending time and calculate total run time
|
|
0
|
603 timeEnd <- as.character(Sys.time())
|
|
1
|
604 timeTaken <- capture.output(round(difftime(timeEnd,timeStart), digits=3))
|
|
|
605 timeTaken <- gsub("Time difference of ", "", timeTaken, fixed=TRUE)
|
|
0
|
606 ################################################################################
|
|
|
607 ### HTML Generation
|
|
|
608 ################################################################################
|
|
|
609 # Clear file
|
|
|
610 cat("", file=htmlPath)
|
|
|
611
|
|
|
612 cata("<html>\n")
|
|
|
613 HtmlHead("EdgeR Output")
|
|
|
614
|
|
|
615 cata("<body>\n")
|
|
|
616 cata("<h3>EdgeR Analysis Output:</h3>\n")
|
|
|
617 cata("<h4>Input Summary:</h4>\n")
|
|
|
618 if (inputType=="fastq") {
|
|
|
619 cata("<ul>\n")
|
|
|
620 ListItem(hpReadout[1])
|
|
|
621 ListItem(hpReadout[2])
|
|
|
622 cata("</ul>\n")
|
|
|
623 cata(hpReadout[3], "<br/>\n")
|
|
|
624 cata("<ul>\n")
|
|
|
625 ListItem(hpReadout[4])
|
|
|
626 ListItem(hpReadout[7])
|
|
|
627 cata("</ul>\n")
|
|
|
628 cata(hpReadout[8:11], sep="<br/>\n")
|
|
|
629 cata("<br />\n")
|
|
|
630 cata("<b>Please check that read percentages are consistent with ")
|
|
|
631 cata("expectations.</b><br >\n")
|
|
|
632 } else if (inputType=="counts") {
|
|
|
633 cata("<ul>\n")
|
|
|
634 ListItem("Number of Samples: ", ncol(data$counts))
|
|
|
635 ListItem("Number of Hairpins: ", countsRows)
|
|
|
636 ListItem("Number of annotations provided: ", annoRows)
|
|
|
637 ListItem("Number of annotations matched to hairpin: ", annoMatched)
|
|
|
638 cata("</ul>\n")
|
|
|
639 }
|
|
|
640
|
|
|
641 cata("The estimated common biological coefficient of variation (BCV) is: ",
|
|
|
642 commonBCV, "<br />\n")
|
|
|
643
|
|
|
644 cata("<h4>Output:</h4>\n")
|
|
|
645 cata("All images displayed have PDF copy at the bottom of the page, these can ")
|
|
|
646 cata("exported in a pdf viewer to high resolution image format. <br/>\n")
|
|
|
647 for (i in 1:nrow(imageData)) {
|
|
|
648 if (grepl("barcode", imageData$Link[i])) {
|
|
|
649 if (packageVersion("limma")<"3.19.19") {
|
|
|
650 HtmlImage(imageData$Link[i], imageData$Label[i],
|
|
|
651 height=length(selectedGenes)*150)
|
|
|
652 } else {
|
|
|
653 HtmlImage(imageData$Link[i], imageData$Label[i],
|
|
|
654 height=length(selectedGenes)*300)
|
|
|
655 }
|
|
|
656 } else {
|
|
|
657 HtmlImage(imageData$Link[i], imageData$Label[i])
|
|
|
658 }
|
|
|
659 }
|
|
|
660 cata("<br/>\n")
|
|
|
661
|
|
|
662 cata("<h4>Plots:</h4>\n")
|
|
|
663 for (i in 1:nrow(linkData)) {
|
|
|
664 if (!grepl(".tsv", linkData$Link[i])) {
|
|
|
665 HtmlLink(linkData$Link[i], linkData$Label[i])
|
|
|
666 }
|
|
|
667 }
|
|
|
668
|
|
|
669 cata("<h4>Tables:</h4>\n")
|
|
|
670 for (i in 1:nrow(linkData)) {
|
|
|
671 if (grepl(".tsv", linkData$Link[i])) {
|
|
|
672 HtmlLink(linkData$Link[i], linkData$Label[i])
|
|
|
673 }
|
|
|
674 }
|
|
|
675
|
|
|
676 cata("<p>alt-click any of the links to download the file, or click the name ")
|
|
|
677 cata("of this task in the galaxy history panel and click on the floppy ")
|
|
|
678 cata("disk icon to download all files in a zip archive.</p>\n")
|
|
|
679 cata("<p>.tsv files are tab seperated files that can be viewed using Excel ")
|
|
|
680 cata("or other spreadsheet programs</p>\n")
|
|
|
681 cata("<table border=\"0\">\n")
|
|
|
682
|
|
1
|
683 cata("<h4>Additional Information:</h4>\n")
|
|
|
684
|
|
|
685 if (inputType == "fastq") {
|
|
|
686 ListItem("Data was gathered from fastq raw read file(s).")
|
|
|
687 } else if (inputType == "counts") {
|
|
|
688 ListItem("Data was gathered from a table of counts.")
|
|
|
689 }
|
|
|
690
|
|
|
691 if (cpmReq!=0 && sampleReq!=0) {
|
|
|
692 tempStr <- paste("Hairpins that do not have more than", cpmReq,
|
|
|
693 "CPM in at least", sampleReq, "samples are considered",
|
|
|
694 "insignificant and filtered out.")
|
|
|
695 ListItem(tempStr)
|
|
|
696 filterProp <- round(filteredCount/preFilterCount*100, digits=2)
|
|
|
697 tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp,
|
|
|
698 "%) hairpins were filtered out for low count-per-million.")
|
|
|
699 ListItem(tempStr)
|
|
|
700 }
|
|
|
701
|
|
|
702 if (workMode == "classic") {
|
|
|
703 ListItem("An exact test was performed on each hairpin.")
|
|
|
704 } else if (workMode == "glm") {
|
|
|
705 ListItem("A generalised linear model was fitted to each hairpin.")
|
|
|
706 }
|
|
|
707
|
|
|
708
|
|
|
709
|
|
|
710 cit <- character()
|
|
|
711 link <-character()
|
|
|
712 link[1] <- paste0("<a href=\"",
|
|
|
713 "http://www.bioconductor.org/packages/release/bioc/",
|
|
|
714 "vignettes/limma/inst/doc/usersguide.pdf",
|
|
|
715 "\">", "limma User's Guide", "</a>.")
|
|
|
716 link[2] <- paste0("<a href=\"",
|
|
|
717 "http://www.bioconductor.org/packages/release/bioc/",
|
|
|
718 "vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf",
|
|
|
719 "\">", "edgeR User's Guide", "</a>")
|
|
|
720
|
|
|
721 cit[1] <- paste("Robinson MD, McCarthy DJ and Smyth GK (2010).",
|
|
|
722 "edgeR: a Bioconductor package for differential",
|
|
|
723 "expression analysis of digital gene expression",
|
|
|
724 "data. Bioinformatics 26, 139-140")
|
|
|
725 cit[2] <- paste("Robinson MD and Smyth GK (2007). Moderated statistical tests",
|
|
|
726 "for assessing differences in tag abundance. Bioinformatics",
|
|
|
727 "23, 2881-2887")
|
|
|
728 cit[3] <- paste("Robinson MD and Smyth GK (2008). Small-sample estimation of",
|
|
|
729 "negative binomial dispersion, with applications to SAGE data.",
|
|
|
730 "Biostatistics, 9, 321-332")
|
|
|
731
|
|
|
732 cit[4] <- paste("McCarthy DJ, Chen Y and Smyth GK (2012). Differential",
|
|
|
733 "expression analysis of multifactor RNA-Seq experiments with",
|
|
|
734 "respect to biological variation. Nucleic Acids Research 40,",
|
|
|
735 "4288-4297")
|
|
|
736
|
|
|
737 cata("<h4>Citations</h4>")
|
|
|
738 cata("<ol>\n")
|
|
|
739 ListItem(cit[1])
|
|
|
740 ListItem(cit[2])
|
|
|
741 ListItem(cit[3])
|
|
|
742 ListItem(cit[4])
|
|
|
743 cata("</ol>\n")
|
|
|
744
|
|
|
745 cata("<table border=\"0\">\n")
|
|
0
|
746 cata("<tr>\n")
|
|
|
747 TableItem("Task started at:"); TableItem(timeStart)
|
|
|
748 cata("</tr>\n")
|
|
|
749 cata("<tr>\n")
|
|
|
750 TableItem("Task ended at:"); TableItem(timeEnd)
|
|
|
751 cata("</tr>\n")
|
|
1
|
752 cata("<tr>\n")
|
|
|
753 TableItem("Task run time:"); TableItem(timeTaken)
|
|
|
754 cata("<tr>\n")
|
|
|
755 cata("</table>\n")
|
|
0
|
756
|
|
|
757 cata("</body>\n")
|
|
|
758 cata("</html>")
|