Mercurial > repos > iuc > edger
comparison edger.R @ 14:c5fa04118f83 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/edger commit 4b17306763bc8eb92c8967c7b4b57abcc514e670
author | iuc |
---|---|
date | Wed, 04 Sep 2024 15:49:53 +0000 |
parents | 838b481dc6f9 |
children |
comparison
equal
deleted
inserted
replaced
13:838b481dc6f9 | 14:c5fa04118f83 |
---|---|
6 # outPath", "o", 1, "character" -Path to folder to write all output to | 6 # outPath", "o", 1, "character" -Path to folder to write all output to |
7 # filesPath", "j", 2, "character" -JSON list object if multiple files input | 7 # filesPath", "j", 2, "character" -JSON list object if multiple files input |
8 # matrixPath", "m", 2, "character" -Path to count matrix | 8 # matrixPath", "m", 2, "character" -Path to count matrix |
9 # factFile", "f", 2, "character" -Path to factor information file | 9 # factFile", "f", 2, "character" -Path to factor information file |
10 # factInput", "i", 2, "character" -String containing factors if manually input | 10 # factInput", "i", 2, "character" -String containing factors if manually input |
11 # formula", "F", 2, "character". -String containing a formula to override default use of factInput | |
11 # annoPath", "a", 2, "character" -Path to input containing gene annotations | 12 # annoPath", "a", 2, "character" -Path to input containing gene annotations |
12 # contrastData", "C", 1, "character" -String containing contrasts of interest | 13 # contrastData", "C", 1, "character" -String containing contrasts of interest |
13 # cpmReq", "c", 2, "double" -Float specifying cpm requirement | 14 # cpmReq", "c", 2, "double" -Float specifying cpm requirement |
14 # cntReq", "z", 2, "integer" -Integer specifying minimum total count requirement | 15 # cntReq", "z", 2, "integer" -Integer specifying minimum total count requirement |
15 # sampleReq", "s", 2, "integer" -Integer specifying cpm requirement | 16 # sampleReq", "s", 2, "integer" -Integer specifying cpm requirement |
39 # Record starting time | 40 # Record starting time |
40 time_start <- as.character(Sys.time()) | 41 time_start <- as.character(Sys.time()) |
41 | 42 |
42 # setup R error handling to go to stderr | 43 # setup R error handling to go to stderr |
43 options(show.error.messages = FALSE, error = function() { | 44 options(show.error.messages = FALSE, error = function() { |
44 cat(geterrmessage(), file = stderr()) | 45 cat(geterrmessage(), file = stderr()) |
45 q("no", 1, FALSE) | 46 q("no", 1, FALSE) |
46 }) | 47 }) |
47 | 48 |
48 # we need that to not crash galaxy with an UTF8 error on German LC settings. | 49 # we need that to not crash galaxy with an UTF8 error on German LC settings. |
49 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 50 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
50 | 51 |
61 ### Function Delcaration | 62 ### Function Delcaration |
62 ################################################################################ | 63 ################################################################################ |
63 # Function to sanitise contrast equations so there are no whitespaces | 64 # Function to sanitise contrast equations so there are no whitespaces |
64 # surrounding the arithmetic operators, leading or trailing whitespace | 65 # surrounding the arithmetic operators, leading or trailing whitespace |
65 sanitise_equation <- function(equation) { | 66 sanitise_equation <- function(equation) { |
66 equation <- gsub(" *[+] *", "+", equation) | 67 equation <- gsub(" *[+] *", "+", equation) |
67 equation <- gsub(" *[-] *", "-", equation) | 68 equation <- gsub(" *[-] *", "-", equation) |
68 equation <- gsub(" *[/] *", "/", equation) | 69 equation <- gsub(" *[/] *", "/", equation) |
69 equation <- gsub(" *[*] *", "*", equation) | 70 equation <- gsub(" *[*] *", "*", equation) |
70 equation <- gsub("^\\s+|\\s+$", "", equation) | 71 equation <- gsub("^\\s+|\\s+$", "", equation) |
71 return(equation) | 72 return(equation) |
72 } | 73 } |
73 | 74 |
74 # Function to sanitise group information | 75 # Function to sanitise group information |
75 sanitise_groups <- function(string) { | 76 sanitise_groups <- function(string) { |
76 string <- gsub(" *[,] *", ",", string) | 77 string <- gsub(" *[,] *", ",", string) |
77 string <- gsub("^\\s+|\\s+$", "", string) | 78 string <- gsub("^\\s+|\\s+$", "", string) |
78 return(string) | 79 return(string) |
79 } | 80 } |
80 | 81 |
81 # Function to change periods to whitespace in a string | 82 # Function to change periods to whitespace in a string |
82 unmake_names <- function(string) { | 83 unmake_names <- function(string) { |
83 string <- gsub(".", " ", string, fixed = TRUE) | 84 string <- gsub(".", " ", string, fixed = TRUE) |
84 return(string) | 85 return(string) |
86 } | |
87 | |
88 # Sanitise file base names coming from factors or contrasts | |
89 sanitise_basename <- function(string) { | |
90 string <- gsub("[/^]", "_", string) | |
91 return(string) | |
85 } | 92 } |
86 | 93 |
87 # Generate output folder and paths | 94 # Generate output folder and paths |
88 make_out <- function(filename) { | 95 make_out <- function(filename) { |
89 return(paste0(out_path, "/", filename)) | 96 return(paste0(out_path, "/", filename)) |
90 } | 97 } |
91 | 98 |
92 # Generating design information | 99 # Generating design information |
93 paste_listname <- function(string) { | 100 paste_listname <- function(string) { |
94 return(paste0("factors$", string)) | 101 return(paste0("factors$", string)) |
95 } | 102 } |
96 | 103 |
97 # Create cata function: default path set, default seperator empty and appending | 104 # Create cata function: default path set, default seperator empty and appending |
98 # true by default (Ripped straight from the cat function with altered argument | 105 # true by default (Ripped straight from the cat function with altered argument |
99 # defaults) | 106 # defaults) |
100 cata <- function(..., file = opt$htmlPath, sep = "", fill = FALSE, labels = NULL, | 107 cata <- function(..., file = opt$htmlPath, sep = "", fill = FALSE, labels = NULL, |
101 append = TRUE) { | 108 append = TRUE) { |
102 if (is.character(file)) { | 109 if (is.character(file)) { |
103 if (file == "") { | 110 if (file == "") { |
104 file <- stdout() | 111 file <- stdout() |
105 } else if (substring(file, 1L, 1L) == "|") { | 112 } else if (substring(file, 1L, 1L) == "|") { |
106 file <- pipe(substring(file, 2L), "w") | 113 file <- pipe(substring(file, 2L), "w") |
107 on.exit(close(file)) | 114 on.exit(close(file)) |
108 } else { | 115 } else { |
109 file <- file(file, ifelse(append, "a", "w")) | 116 file <- file(file, ifelse(append, "a", "w")) |
110 on.exit(close(file)) | 117 on.exit(close(file)) |
111 } | 118 } |
112 } | 119 } |
113 .Internal(cat(list(...), file, sep, fill, labels, append)) | 120 .Internal(cat(list(...), file, sep, fill, labels, append)) |
114 } | 121 } |
115 | 122 |
116 # Function to write code for html head and title | 123 # Function to write code for html head and title |
117 html_head <- function(title) { | 124 html_head <- function(title) { |
118 cata("<head>\n") | 125 cata("<head>\n") |
119 cata("<title>", title, "</title>\n") | 126 cata("<title>", title, "</title>\n") |
120 cata("</head>\n") | 127 cata("</head>\n") |
121 } | 128 } |
122 | 129 |
123 # Function to write code for html links | 130 # Function to write code for html links |
124 html_link <- function(address, label = address) { | 131 html_link <- function(address, label = address) { |
125 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n") | 132 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n") |
126 } | 133 } |
127 | 134 |
128 # Function to write code for html images | 135 # Function to write code for html images |
129 html_image <- function(source, label = source, height = 600, width = 600) { | 136 html_image <- function(source, label = source, height = 600, width = 600) { |
130 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height) | 137 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height) |
131 cata("\" width=\"", width, "\"/>\n") | 138 cata("\" width=\"", width, "\"/>\n") |
132 } | 139 } |
133 | 140 |
134 # Function to write code for html list items | 141 # Function to write code for html list items |
135 list_item <- function(...) { | 142 list_item <- function(...) { |
136 cata("<li>", ..., "</li>\n") | 143 cata("<li>", ..., "</li>\n") |
137 } | 144 } |
138 | 145 |
139 table_item <- function(...) { | 146 table_item <- function(...) { |
140 cata("<td>", ..., "</td>\n") | 147 cata("<td>", ..., "</td>\n") |
141 } | 148 } |
142 | 149 |
143 table_head_item <- function(...) { | 150 table_head_item <- function(...) { |
144 cata("<th>", ..., "</th>\n") | 151 cata("<th>", ..., "</th>\n") |
145 } | 152 } |
146 | 153 |
147 ################################################################################ | 154 ################################################################################ |
148 ### Input Processing | 155 ### Input Processing |
149 ################################################################################ | 156 ################################################################################ |
151 # Collect arguments from command line | 158 # Collect arguments from command line |
152 args <- commandArgs(trailingOnly = TRUE) | 159 args <- commandArgs(trailingOnly = TRUE) |
153 | 160 |
154 # Get options, using the spec as defined by the enclosed list. | 161 # Get options, using the spec as defined by the enclosed list. |
155 # Read the options from the default: commandArgs(TRUE). | 162 # Read the options from the default: commandArgs(TRUE). |
156 spec <- matrix(c( | 163 spec <- matrix( |
157 "htmlPath", "R", 1, "character", | 164 c( |
158 "outPath", "o", 1, "character", | 165 "htmlPath", "R", 1, "character", |
159 "filesPath", "j", 2, "character", | 166 "outPath", "o", 1, "character", |
160 "matrixPath", "m", 2, "character", | 167 "filesPath", "j", 2, "character", |
161 "factFile", "f", 2, "character", | 168 "matrixPath", "m", 2, "character", |
162 "factInput", "i", 2, "character", | 169 "factFile", "f", 2, "character", |
163 "annoPath", "a", 2, "character", | 170 "formula", "F", 2, "character", |
164 "contrastData", "C", 1, "character", | 171 "factInput", "i", 2, "character", |
165 "cpmReq", "c", 1, "double", | 172 "annoPath", "a", 2, "character", |
166 "totReq", "y", 0, "logical", | 173 "contrastData", "C", 1, "character", |
167 "cntReq", "z", 1, "integer", | 174 "cpmReq", "c", 1, "double", |
168 "sampleReq", "s", 1, "integer", | 175 "totReq", "y", 0, "logical", |
169 "normCounts", "x", 0, "logical", | 176 "cntReq", "z", 1, "integer", |
170 "rdaOpt", "r", 0, "logical", | 177 "sampleReq", "s", 1, "integer", |
171 "lfcReq", "l", 1, "double", | 178 "normCounts", "x", 0, "logical", |
172 "pValReq", "p", 1, "double", | 179 "rdaOpt", "r", 0, "logical", |
173 "pAdjOpt", "d", 1, "character", | 180 "lfcReq", "l", 1, "double", |
174 "normOpt", "n", 1, "character", | 181 "pValReq", "p", 1, "double", |
175 "robOpt", "b", 0, "logical", | 182 "pAdjOpt", "d", 1, "character", |
176 "lrtOpt", "t", 0, "logical" | 183 "normOpt", "n", 1, "character", |
177 ), | 184 "robOpt", "b", 0, "logical", |
178 byrow = TRUE, ncol = 4 | 185 "lrtOpt", "t", 0, "logical" |
186 ), | |
187 byrow = TRUE, ncol = 4 | |
179 ) | 188 ) |
180 opt <- getopt(spec) | 189 opt <- getopt(spec) |
181 | 190 |
182 | 191 |
183 if (is.null(opt$matrixPath) && is.null(opt$filesPath)) { | 192 if (is.null(opt$matrixPath) && is.null(opt$filesPath)) { |
184 cat("A counts matrix (or a set of counts files) is required.\n") | 193 cat("A counts matrix (or a set of counts files) is required.\n") |
185 q(status = 1) | 194 q(status = 1) |
186 } | 195 } |
187 | 196 |
188 if (is.null(opt$cpmReq)) { | 197 if (is.null(opt$cpmReq)) { |
189 filt_cpm <- FALSE | 198 filt_cpm <- FALSE |
190 } else { | 199 } else { |
191 filt_cpm <- TRUE | 200 filt_cpm <- TRUE |
192 } | 201 } |
193 | 202 |
194 if (is.null(opt$cntReq) || is.null(opt$sampleReq)) { | 203 if (is.null(opt$cntReq) || is.null(opt$sampleReq)) { |
195 filt_smpcount <- FALSE | 204 filt_smpcount <- FALSE |
196 } else { | 205 } else { |
197 filt_smpcount <- TRUE | 206 filt_smpcount <- TRUE |
198 } | 207 } |
199 | 208 |
200 if (is.null(opt$totReq)) { | 209 if (is.null(opt$totReq)) { |
201 filt_totcount <- FALSE | 210 filt_totcount <- FALSE |
202 } else { | 211 } else { |
203 filt_totcount <- TRUE | 212 filt_totcount <- TRUE |
204 } | 213 } |
205 | 214 |
206 if (is.null(opt$lrtOpt)) { | 215 if (is.null(opt$lrtOpt)) { |
207 want_lrt <- FALSE | 216 want_lrt <- FALSE |
208 } else { | 217 } else { |
209 want_lrt <- TRUE | 218 want_lrt <- TRUE |
210 } | 219 } |
211 | 220 |
212 if (is.null(opt$rdaOpt)) { | 221 if (is.null(opt$rdaOpt)) { |
213 want_rda <- FALSE | 222 want_rda <- FALSE |
214 } else { | 223 } else { |
215 want_rda <- TRUE | 224 want_rda <- TRUE |
216 } | 225 } |
217 | 226 |
218 if (is.null(opt$annoPath)) { | 227 if (is.null(opt$annoPath)) { |
219 have_anno <- FALSE | 228 have_anno <- FALSE |
220 } else { | 229 } else { |
221 have_anno <- TRUE | 230 have_anno <- TRUE |
222 } | 231 } |
223 | 232 |
224 if (is.null(opt$normCounts)) { | 233 if (is.null(opt$normCounts)) { |
225 want_norm <- FALSE | 234 want_norm <- FALSE |
226 } else { | 235 } else { |
227 want_norm <- TRUE | 236 want_norm <- TRUE |
228 } | 237 } |
229 | 238 |
230 if (is.null(opt$robOpt)) { | 239 if (is.null(opt$robOpt)) { |
231 want_robust <- FALSE | 240 want_robust <- FALSE |
232 } else { | 241 } else { |
233 want_robust <- TRUE | 242 want_robust <- TRUE |
234 } | 243 } |
235 | 244 |
236 | 245 |
237 if (!is.null(opt$filesPath)) { | 246 if (!is.null(opt$filesPath)) { |
238 # Process the separate count files (adapted from DESeq2 wrapper) | 247 # Process the separate count files (adapted from DESeq2 wrapper) |
239 library("rjson") | 248 library("rjson") |
240 parser <- newJSONParser() | 249 parser <- newJSONParser() |
241 parser$addData(opt$filesPath) | 250 parser$addData(opt$filesPath) |
242 factor_list <- parser$getObject() | 251 factor_list <- parser$getObject() |
243 factors <- sapply(factor_list, function(x) x[[1]]) | 252 factors <- sapply(factor_list, function(x) x[[1]]) |
244 filenames_in <- unname(unlist(factor_list[[1]][[2]])) | 253 filenames_in <- unname(unlist(factor_list[[1]][[2]])) |
245 sampletable <- data.frame( | 254 sampletable <- data.frame( |
246 sample = basename(filenames_in), | 255 sample = basename(filenames_in), |
247 filename = filenames_in, | 256 filename = filenames_in, |
248 row.names = filenames_in, | 257 row.names = filenames_in, |
249 stringsAsFactors = FALSE | 258 stringsAsFactors = FALSE |
250 ) | 259 ) |
251 for (factor in factor_list) { | 260 for (factor in factor_list) { |
252 factorname <- factor[[1]] | 261 factorname <- factor[[1]] |
253 sampletable[[factorname]] <- character(nrow(sampletable)) | 262 sampletable[[factorname]] <- character(nrow(sampletable)) |
254 lvls <- sapply(factor[[2]], function(x) names(x)) | 263 lvls <- sapply(factor[[2]], function(x) names(x)) |
255 for (i in seq_along(factor[[2]])) { | 264 for (i in seq_along(factor[[2]])) { |
256 files <- factor[[2]][[i]][[1]] | 265 files <- factor[[2]][[i]][[1]] |
257 sampletable[files, factorname] <- lvls[i] | 266 sampletable[files, factorname] <- lvls[i] |
258 } | 267 } |
259 sampletable[[factorname]] <- factor(sampletable[[factorname]], levels = lvls) | 268 sampletable[[factorname]] <- factor(sampletable[[factorname]], levels = lvls) |
260 } | 269 } |
261 rownames(sampletable) <- sampletable$sample | 270 rownames(sampletable) <- sampletable$sample |
262 rem <- c("sample", "filename") | 271 rem <- c("sample", "filename") |
263 factors <- sampletable[, !(names(sampletable) %in% rem), drop = FALSE] | 272 factors <- sampletable[, !(names(sampletable) %in% rem), drop = FALSE] |
264 | 273 |
265 # read in count files and create single table | 274 # read in count files and create single table |
266 countfiles <- lapply(sampletable$filename, function(x) { | 275 countfiles <- lapply(sampletable$filename, function(x) { |
267 read.delim(x, row.names = 1) | 276 read.delim(x, row.names = 1) |
268 }) | 277 }) |
269 counts <- do.call("cbind", countfiles) | 278 counts <- do.call("cbind", countfiles) |
270 } else { | 279 } else { |
271 # Process the single count matrix | 280 # Process the single count matrix |
272 counts <- read.table(opt$matrixPath, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = FALSE) | 281 counts <- read.table(opt$matrixPath, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = FALSE) |
273 row.names(counts) <- counts[, 1] | 282 row.names(counts) <- counts[, 1] |
274 counts <- counts[, -1] | 283 counts <- counts[, -1] |
275 countsrows <- nrow(counts) | 284 countsrows <- nrow(counts) |
276 | 285 |
277 # Process factors | 286 # Process factors |
278 if (is.null(opt$factInput)) { | 287 if (is.null(opt$factInput)) { |
279 factordata <- read.table(opt$factFile, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = TRUE) | 288 factordata <- read.table(opt$factFile, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = TRUE) |
280 # check samples names match | 289 # check samples names match |
281 if (!any(factordata[, 1] %in% colnames(counts))) { | 290 if (!any(factordata[, 1] %in% colnames(counts))) { |
282 stop("Sample IDs in factors file and count matrix don't match") | 291 stop("Sample IDs in factors file and count matrix don't match") |
283 } | 292 } |
284 # order samples as in counts matrix | 293 # order samples as in counts matrix |
285 factordata <- factordata[match(colnames(counts), factordata[, 1]), ] | 294 factordata <- factordata[match(colnames(counts), factordata[, 1]), ] |
286 factors <- data.frame(sapply(factordata[, -1, drop = FALSE], make.names)) | 295 factors <- data.frame(sapply(factordata[, -1, drop = FALSE], make.names)) |
287 } else { | 296 } else { |
288 factors <- unlist(strsplit(opt$factInput, "|", fixed = TRUE)) | 297 factors <- unlist(strsplit(opt$factInput, "|", fixed = TRUE)) |
289 factordata <- list() | 298 factordata <- list() |
290 for (fact in factors) { | 299 for (fact in factors) { |
291 newfact <- unlist(strsplit(fact, split = "::")) | 300 newfact <- unlist(strsplit(fact, split = "::")) |
292 factordata <- rbind(factordata, newfact) | 301 factordata <- rbind(factordata, newfact) |
293 } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor. | 302 } # Factors have the form: FACT_NAME::LEVEL,LEVEL,LEVEL,LEVEL,... The first factor is the Primary Factor. |
294 | 303 |
295 # Set the row names to be the name of the factor and delete first row | 304 # Set the row names to be the name of the factor and delete first row |
296 row.names(factordata) <- factordata[, 1] | 305 row.names(factordata) <- factordata[, 1] |
297 factordata <- factordata[, -1] | 306 factordata <- factordata[, -1] |
298 factordata <- sapply(factordata, sanitise_groups) | 307 factordata <- sapply(factordata, sanitise_groups) |
299 factordata <- sapply(factordata, strsplit, split = ",") | 308 factordata <- sapply(factordata, strsplit, split = ",") |
300 factordata <- sapply(factordata, make.names) | 309 factordata <- sapply(factordata, make.names) |
301 # Transform factor data into data frame of R factor objects | 310 # Transform factor data into data frame of R factor objects |
302 factors <- data.frame(factordata) | 311 factors <- data.frame(factordata) |
303 } | 312 } |
304 } | 313 } |
305 | 314 |
306 # if annotation file provided | 315 # if annotation file provided |
307 if (have_anno) { | 316 if (have_anno) { |
308 geneanno <- read.table(opt$annoPath, header = TRUE, sep = "\t", quote = "", strip.white = TRUE, stringsAsFactors = FALSE) | 317 geneanno <- read.table(opt$annoPath, header = TRUE, sep = "\t", quote = "", strip.white = TRUE, stringsAsFactors = FALSE) |
309 } | 318 } |
310 | 319 |
311 # Create output directory | 320 # Create output directory |
312 out_path <- opt$outPath | 321 out_path <- opt$outPath |
313 dir.create(out_path, showWarnings = FALSE) | 322 dir.create(out_path, showWarnings = FALSE) |
314 | 323 |
315 # Split up contrasts separated by comma into a vector then sanitise | 324 # Check if contrastData is a file or not |
316 contrast_data <- unlist(strsplit(opt$contrastData, split = ",")) | 325 if (file.exists(opt$contrastData)) { |
326 contrast_data <- unlist(read.table(opt$contrastData, sep = "\t", header = TRUE)[[1]]) | |
327 } else { | |
328 # Split up contrasts separated by comma into a vector then sanitise | |
329 contrast_data <- unlist(strsplit(opt$contrastData, split = ",")) | |
330 } | |
317 contrast_data <- sanitise_equation(contrast_data) | 331 contrast_data <- sanitise_equation(contrast_data) |
318 contrast_data <- gsub(" ", ".", contrast_data, fixed = TRUE) | 332 contrast_data <- gsub(" ", ".", contrast_data, fixed = TRUE) |
319 | 333 |
320 bcv_pdf <- make_out("bcvplot.pdf") | 334 bcv_pdf <- make_out("bcvplot.pdf") |
321 bcv_png <- make_out("bcvplot.png") | 335 bcv_png <- make_out("bcvplot.png") |
322 ql_pdf <- make_out("qlplot.pdf") | 336 ql_pdf <- make_out("qlplot.pdf") |
323 ql_png <- make_out("qlplot.png") | 337 ql_png <- make_out("qlplot.png") |
324 mds_pdf <- character() # Initialise character vector | 338 mds_pdf <- character() # Initialise character vector |
325 mds_png <- character() | 339 mds_png <- character() |
326 for (i in seq_len(ncol(factors))) { | 340 for (i in seq_len(ncol(factors))) { |
327 mds_pdf[i] <- make_out(paste0("mdsplot_", names(factors)[i], ".pdf")) | 341 mds_pdf[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf")) |
328 mds_png[i] <- make_out(paste0("mdsplot_", names(factors)[i], ".png")) | 342 mds_png[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png")) |
329 } | 343 } |
330 md_pdf <- character() | 344 md_pdf <- character() |
331 md_png <- character() | 345 md_png <- character() |
332 top_out <- character() | 346 top_out <- character() |
333 for (i in seq_along(contrast_data)) { | 347 for (i in seq_along(contrast_data)) { |
334 md_pdf[i] <- make_out(paste0("mdplot_", contrast_data[i], ".pdf")) | 348 md_pdf[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf")) |
335 md_png[i] <- make_out(paste0("mdplot_", contrast_data[i], ".png")) | 349 md_png[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png")) |
336 top_out[i] <- make_out(paste0("edgeR_", contrast_data[i], ".tsv")) | 350 top_out[i] <- make_out(paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv")) |
337 } # Save output paths for each contrast as vectors | 351 } # Save output paths for each contrast as vectors |
338 norm_out <- make_out("edgeR_normcounts.tsv") | 352 norm_out <- make_out("edgeR_normcounts.tsv") |
339 rda_out <- make_out("edgeR_analysis.RData") | 353 rda_out <- make_out("edgeR_analysis.RData") |
340 session_out <- make_out("session_info.txt") | 354 session_out <- make_out("session_info.txt") |
341 | 355 |
355 | 369 |
356 # Extract counts and annotation data | 370 # Extract counts and annotation data |
357 data <- list() | 371 data <- list() |
358 data$counts <- counts | 372 data$counts <- counts |
359 if (have_anno) { | 373 if (have_anno) { |
360 # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno) | 374 # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno) |
361 annoord <- geneanno[match(row.names(counts), geneanno[, 1]), ] | 375 annoord <- geneanno[match(row.names(counts), geneanno[, 1]), ] |
362 data$genes <- annoord | 376 data$genes <- annoord |
363 } else { | 377 } else { |
364 data$genes <- data.frame(GeneID = row.names(counts)) | 378 data$genes <- data.frame(GeneID = row.names(counts)) |
365 } | 379 } |
366 | 380 |
367 # If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of | 381 # If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of |
368 # samples. Default is no filtering | 382 # samples. Default is no filtering |
369 prefilter_count <- nrow(data$counts) | 383 prefilter_count <- nrow(data$counts) |
370 | 384 |
371 if (filt_cpm || filt_smpcount || filt_totcount) { | 385 if (filt_cpm || filt_smpcount || filt_totcount) { |
372 if (filt_totcount) { | 386 if (filt_totcount) { |
373 keep <- rowSums(data$counts) >= opt$cntReq | 387 keep <- rowSums(data$counts) >= opt$cntReq |
374 } else if (filt_smpcount) { | 388 } else if (filt_smpcount) { |
375 keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq | 389 keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq |
376 } else if (filt_cpm) { | 390 } else if (filt_cpm) { |
377 keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq | 391 keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq |
378 } | 392 } |
379 | 393 |
380 data$counts <- data$counts[keep, ] | 394 data$counts <- data$counts[keep, ] |
381 data$genes <- data$genes[keep, , drop = FALSE] | 395 data$genes <- data$genes[keep, , drop = FALSE] |
382 } | 396 } |
383 | 397 |
384 postfilter_count <- nrow(data$counts) | 398 postfilter_count <- nrow(data$counts) |
385 filtered_count <- prefilter_count - postfilter_count | 399 filtered_count <- prefilter_count - postfilter_count |
386 | 400 |
395 colnames(data) <- samplenames | 409 colnames(data) <- samplenames |
396 data$samples <- factors | 410 data$samples <- factors |
397 data$genes <- genes | 411 data$genes <- genes |
398 | 412 |
399 | 413 |
400 | 414 if (!is.null(opt$formula)) { |
401 formula <- "~0" | 415 formula <- opt$formula |
402 for (i in seq_along(factor_list)) { | 416 # sanitisation can be getting rid of the "~" |
403 formula <- paste(formula, factor_list[i], sep = "+") | 417 if (!startsWith(formula, "~")) { |
418 formula <- paste0("~", formula) | |
419 } | |
420 } else { | |
421 formula <- "~0" | |
422 for (i in seq_along(factor_list)) { | |
423 formula <- paste(formula, factor_list[i], sep = "+") | |
424 } | |
404 } | 425 } |
405 | 426 |
406 formula <- formula(formula) | 427 formula <- formula(formula) |
407 design <- model.matrix(formula, factors) | 428 design <- model.matrix(formula, factors) |
408 | 429 |
409 for (i in seq_along(factor_list)) { | 430 for (i in seq_along(factor_list)) { |
410 colnames(design) <- gsub(factor_list[i], "", colnames(design), fixed = TRUE) | 431 colnames(design) <- gsub(factor_list[i], "", colnames(design), fixed = TRUE) |
411 } | 432 } |
412 | 433 |
413 # Calculating normalising factor, estimating dispersion | 434 # Calculating normalising factor, estimating dispersion |
414 data <- calcNormFactors(data, method = opt$normOpt) | 435 data <- calcNormFactors(data, method = opt$normOpt) |
415 | 436 |
416 if (want_robust) { | 437 if (want_robust) { |
417 data <- estimateDisp(data, design = design, robust = TRUE) | 438 data <- estimateDisp(data, design = design, robust = TRUE) |
418 } else { | 439 } else { |
419 data <- estimateDisp(data, design = design) | 440 data <- estimateDisp(data, design = design) |
420 } | 441 } |
421 | 442 |
422 # Generate contrasts information | 443 # Generate contrasts information |
423 contrasts <- makeContrasts(contrasts = contrast_data, levels = design) | 444 contrasts <- makeContrasts(contrasts = contrast_data, levels = design) |
424 | 445 |
430 labels <- names(counts) | 451 labels <- names(counts) |
431 | 452 |
432 # MDS plot | 453 # MDS plot |
433 png(mds_png, width = 600, height = 600) | 454 png(mds_png, width = 600, height = 600) |
434 plotMDS(data, labels = labels, col = as.numeric(factors[, 1]), cex = 0.8, main = paste("MDS Plot:", names(factors)[1])) | 455 plotMDS(data, labels = labels, col = as.numeric(factors[, 1]), cex = 0.8, main = paste("MDS Plot:", names(factors)[1])) |
435 img_name <- paste0("MDS Plot_", names(factors)[1], ".png") | 456 img_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[1]), ".png") |
436 img_addr <- paste0("mdsplot_", names(factors)[1], ".png") | 457 img_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[1]), ".png") |
437 image_data[1, ] <- c(img_name, img_addr) | 458 image_data[1, ] <- c(img_name, img_addr) |
438 invisible(dev.off()) | 459 invisible(dev.off()) |
439 | 460 |
440 pdf(mds_pdf) | 461 pdf(mds_pdf) |
441 plotMDS(data, labels = labels, col = as.numeric(factors[, 1]), cex = 0.8, main = paste("MDS Plot:", names(factors)[1])) | 462 plotMDS(data, labels = labels, col = as.numeric(factors[, 1]), cex = 0.8, main = paste("MDS Plot:", names(factors)[1])) |
442 link_name <- paste0("MDS Plot_", names(factors)[1], ".pdf") | 463 link_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[1]), ".pdf") |
443 link_addr <- paste0("mdsplot_", names(factors)[1], ".pdf") | 464 link_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[1]), ".pdf") |
444 link_data[1, ] <- c(link_name, link_addr) | 465 link_data[1, ] <- c(link_name, link_addr) |
445 invisible(dev.off()) | 466 invisible(dev.off()) |
446 | 467 |
447 # If additional factors create additional MDS plots coloured by factor | 468 # If additional factors create additional MDS plots coloured by factor |
448 if (ncol(factors) > 1) { | 469 if (ncol(factors) > 1) { |
449 for (i in 2:ncol(factors)) { | 470 for (i in 2:ncol(factors)) { |
450 png(mds_png[i], width = 600, height = 600) | 471 png(mds_png[i], width = 600, height = 600) |
451 plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i])) | 472 plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i])) |
452 img_name <- paste0("MDS Plot_", names(factors)[i], ".png") | 473 img_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".png") |
453 img_addr <- paste0("mdsplot_", names(factors)[i], ".png") | 474 img_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png") |
454 image_data <- rbind(image_data, c(img_name, img_addr)) | 475 image_data <- rbind(image_data, c(img_name, img_addr)) |
455 invisible(dev.off()) | 476 invisible(dev.off()) |
456 | 477 |
457 pdf(mds_pdf[i]) | 478 pdf(mds_pdf[i]) |
458 plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i])) | 479 plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i])) |
459 link_name <- paste0("MDS Plot_", names(factors)[i], ".pdf") | 480 link_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".pdf") |
460 link_addr <- paste0("mdsplot_", names(factors)[i], ".pdf") | 481 link_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf") |
461 link_data <- rbind(link_data, c(link_name, link_addr)) | 482 link_data <- rbind(link_data, c(link_name, link_addr)) |
462 invisible(dev.off()) | 483 invisible(dev.off()) |
463 } | 484 } |
464 } | 485 } |
465 | 486 |
466 # BCV Plot | 487 # BCV Plot |
467 png(bcv_png, width = 600, height = 600) | 488 png(bcv_png, width = 600, height = 600) |
468 plotBCV(data, main = "BCV Plot") | 489 plotBCV(data, main = "BCV Plot") |
478 link_data <- rbind(link_data, c(link_name, link_addr)) | 499 link_data <- rbind(link_data, c(link_name, link_addr)) |
479 invisible(dev.off()) | 500 invisible(dev.off()) |
480 | 501 |
481 # Generate fit | 502 # Generate fit |
482 if (want_lrt) { | 503 if (want_lrt) { |
483 fit <- glmFit(data, design) | 504 fit <- glmFit(data, design) |
484 } else { | 505 } else { |
485 if (want_robust) { | 506 if (want_robust) { |
486 fit <- glmQLFit(data, design, robust = TRUE) | 507 fit <- glmQLFit(data, design, robust = TRUE) |
487 } else { | 508 } else { |
488 fit <- glmQLFit(data, design) | 509 fit <- glmQLFit(data, design) |
489 } | 510 } |
490 | 511 |
491 # Plot QL dispersions | 512 # Plot QL dispersions |
492 png(ql_png, width = 600, height = 600) | 513 png(ql_png, width = 600, height = 600) |
493 plotQLDisp(fit, main = "QL Plot") | 514 plotQLDisp(fit, main = "QL Plot") |
494 img_name <- "QL Plot" | 515 img_name <- "QL Plot" |
495 img_addr <- "qlplot.png" | 516 img_addr <- "qlplot.png" |
496 image_data <- rbind(image_data, c(img_name, img_addr)) | 517 image_data <- rbind(image_data, c(img_name, img_addr)) |
497 invisible(dev.off()) | 518 invisible(dev.off()) |
498 | 519 |
499 pdf(ql_pdf) | 520 pdf(ql_pdf) |
500 plotQLDisp(fit, main = "QL Plot") | 521 plotQLDisp(fit, main = "QL Plot") |
501 link_name <- "QL Plot.pdf" | 522 link_name <- "QL Plot.pdf" |
502 link_addr <- "qlplot.pdf" | 523 link_addr <- "qlplot.pdf" |
503 link_data <- rbind(link_data, c(link_name, link_addr)) | 524 link_data <- rbind(link_data, c(link_name, link_addr)) |
504 invisible(dev.off()) | 525 invisible(dev.off()) |
505 } | 526 } |
506 | 527 |
507 # Save normalised counts (log2cpm) | 528 # Save normalised counts (log2cpm) |
508 if (want_norm) { | 529 if (want_norm) { |
509 normalised_counts <- cpm(data, normalized.lib.sizes = TRUE, log = TRUE) | 530 normalised_counts <- cpm(data, normalized.lib.sizes = TRUE, log = TRUE) |
510 normalised_counts <- data.frame(data$genes, normalised_counts) | 531 normalised_counts <- data.frame(data$genes, normalised_counts) |
511 write.table(normalised_counts, file = norm_out, row.names = FALSE, sep = "\t", quote = FALSE) | 532 write.table(normalised_counts, file = norm_out, row.names = FALSE, sep = "\t", quote = FALSE) |
512 link_data <- rbind(link_data, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv")) | 533 link_data <- rbind(link_data, c("edgeR_normcounts.tsv", "edgeR_normcounts.tsv")) |
513 } | 534 } |
514 | 535 |
515 | 536 |
516 for (i in seq_along(contrast_data)) { | 537 for (i in seq_along(contrast_data)) { |
517 if (want_lrt) { | 538 if (want_lrt) { |
518 res <- glmLRT(fit, contrast = contrasts[, i]) | 539 res <- glmLRT(fit, contrast = contrasts[, i]) |
519 } else { | 540 } else { |
520 res <- glmQLFTest(fit, contrast = contrasts[, i]) | 541 res <- glmQLFTest(fit, contrast = contrasts[, i]) |
521 } | 542 } |
522 | 543 |
523 status <- decideTestsDGE(res, | 544 status <- decideTestsDGE(res, |
524 adjust.method = opt$pAdjOpt, p.value = opt$pValReq, | 545 adjust.method = opt$pAdjOpt, p.value = opt$pValReq, |
525 lfc = opt$lfcReq | 546 lfc = opt$lfcReq |
526 ) | 547 ) |
527 sum_status <- summary(status) | 548 sum_status <- summary(status) |
528 | 549 |
529 # Collect counts for differential expression | 550 # Collect counts for differential expression |
530 up_count[i] <- sum_status["Up", ] | 551 up_count[i] <- sum_status["Up", ] |
531 down_count[i] <- sum_status["Down", ] | 552 down_count[i] <- sum_status["Down", ] |
532 flat_count[i] <- sum_status["NotSig", ] | 553 flat_count[i] <- sum_status["NotSig", ] |
533 | 554 |
534 # Write top expressions table | 555 # Write top expressions table |
535 top <- topTags(res, adjust.method = opt$pAdjOpt, n = Inf, sort.by = "PValue") | 556 top <- topTags(res, adjust.method = opt$pAdjOpt, n = Inf, sort.by = "PValue") |
536 write.table(top, file = top_out[i], row.names = FALSE, sep = "\t", quote = FALSE) | 557 write.table(top, file = top_out[i], row.names = FALSE, sep = "\t", quote = FALSE) |
537 | 558 |
538 link_name <- paste0("edgeR_", contrast_data[i], ".tsv") | 559 link_name <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv") |
539 link_addr <- paste0("edgeR_", contrast_data[i], ".tsv") | 560 link_addr <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv") |
540 link_data <- rbind(link_data, c(link_name, link_addr)) | 561 link_data <- rbind(link_data, c(link_name, link_addr)) |
541 | 562 |
542 # Plot MD (log ratios vs mean difference) using limma package | 563 # Plot MD (log ratios vs mean difference) using limma package |
543 pdf(md_pdf[i]) | 564 pdf(md_pdf[i]) |
544 limma::plotMD(res, | 565 limma::plotMD(res, |
545 status = status, | 566 status = status, |
546 main = paste("MD Plot:", unmake_names(contrast_data[i])), | 567 main = paste("MD Plot:", unmake_names(contrast_data[i])), |
547 hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1), | 568 hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1), |
548 xlab = "Average Expression", ylab = "logFC" | 569 xlab = "Average Expression", ylab = "logFC" |
549 ) | 570 ) |
550 | 571 |
551 abline(h = 0, col = "grey", lty = 2) | 572 abline(h = 0, col = "grey", lty = 2) |
552 | 573 |
553 link_name <- paste0("MD Plot_", contrast_data[i], ".pdf") | 574 link_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".pdf") |
554 link_addr <- paste0("mdplot_", contrast_data[i], ".pdf") | 575 link_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf") |
555 link_data <- rbind(link_data, c(link_name, link_addr)) | 576 link_data <- rbind(link_data, c(link_name, link_addr)) |
556 invisible(dev.off()) | 577 invisible(dev.off()) |
557 | 578 |
558 png(md_png[i], height = 600, width = 600) | 579 png(md_png[i], height = 600, width = 600) |
559 limma::plotMD(res, | 580 limma::plotMD(res, |
560 status = status, | 581 status = status, |
561 main = paste("MD Plot:", unmake_names(contrast_data[i])), | 582 main = paste("MD Plot:", unmake_names(contrast_data[i])), |
562 hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1), | 583 hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1), |
563 xlab = "Average Expression", ylab = "logFC" | 584 xlab = "Average Expression", ylab = "logFC" |
564 ) | 585 ) |
565 | 586 |
566 abline(h = 0, col = "grey", lty = 2) | 587 abline(h = 0, col = "grey", lty = 2) |
567 | 588 |
568 img_name <- paste0("MD Plot_", contrast_data[i], ".png") | 589 img_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".png") |
569 img_addr <- paste0("mdplot_", contrast_data[i], ".png") | 590 img_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png") |
570 image_data <- rbind(image_data, c(img_name, img_addr)) | 591 image_data <- rbind(image_data, c(img_name, img_addr)) |
571 invisible(dev.off()) | 592 invisible(dev.off()) |
572 } | 593 } |
573 sig_diff <- data.frame(Up = up_count, Flat = flat_count, Down = down_count) | 594 sig_diff <- data.frame(Up = up_count, Flat = flat_count, Down = down_count) |
574 row.names(sig_diff) <- contrast_data | 595 row.names(sig_diff) <- contrast_data |
575 | 596 |
576 # Save relevant items as rda object | 597 # Save relevant items as rda object |
577 if (want_rda) { | 598 if (want_rda) { |
578 if (want_norm) { | 599 if (want_norm) { |
579 save(counts, data, status, normalised_counts, labels, factors, fit, res, top, contrasts, design, | 600 save(counts, data, status, normalised_counts, labels, factors, fit, res, top, contrasts, design, |
580 file = rda_out, ascii = TRUE | 601 file = rda_out, ascii = TRUE |
581 ) | 602 ) |
582 } else { | 603 } else { |
583 save(counts, data, status, labels, factors, fit, res, top, contrasts, design, | 604 save(counts, data, status, labels, factors, fit, res, top, contrasts, design, |
584 file = rda_out, ascii = TRUE | 605 file = rda_out, ascii = TRUE |
585 ) | 606 ) |
586 } | 607 } |
587 link_data <- rbind(link_data, c("edgeR_analysis.RData", "edgeR_analysis.RData")) | 608 link_data <- rbind(link_data, c("edgeR_analysis.RData", "edgeR_analysis.RData")) |
588 } | 609 } |
589 | 610 |
590 # Record session info | 611 # Record session info |
591 writeLines(capture.output(sessionInfo()), session_out) | 612 writeLines(capture.output(sessionInfo()), session_out) |
592 link_data <- rbind(link_data, c("Session Info", "session_info.txt")) | 613 link_data <- rbind(link_data, c("Session Info", "session_info.txt")) |
610 cata("Links to PDF copies of plots are in 'Plots' section below.<br />\n") | 631 cata("Links to PDF copies of plots are in 'Plots' section below.<br />\n") |
611 | 632 |
612 html_image(image_data$Link[1], image_data$Label[1]) | 633 html_image(image_data$Link[1], image_data$Label[1]) |
613 | 634 |
614 for (i in 2:nrow(image_data)) { | 635 for (i in 2:nrow(image_data)) { |
615 html_image(image_data$Link[i], image_data$Label[i]) | 636 html_image(image_data$Link[i], image_data$Label[i]) |
616 } | 637 } |
617 | 638 |
618 cata("<h4>Differential Expression Counts:</h4>\n") | 639 cata("<h4>Differential Expression Counts:</h4>\n") |
619 | 640 |
620 cata("<table border=\"1\" cellpadding=\"4\">\n") | 641 cata("<table border=\"1\" cellpadding=\"4\">\n") |
621 cata("<tr>\n") | 642 cata("<tr>\n") |
622 table_item() | 643 table_item() |
623 for (i in colnames(sig_diff)) { | 644 for (i in colnames(sig_diff)) { |
624 table_head_item(i) | 645 table_head_item(i) |
625 } | 646 } |
626 cata("</tr>\n") | 647 cata("</tr>\n") |
627 for (i in seq_len(nrow(sig_diff))) { | 648 for (i in seq_len(nrow(sig_diff))) { |
628 cata("<tr>\n") | 649 cata("<tr>\n") |
629 table_head_item(unmake_names(row.names(sig_diff)[i])) | 650 table_head_item(unmake_names(row.names(sig_diff)[i])) |
630 for (j in seq_len(ncol(sig_diff))) { | 651 for (j in seq_len(ncol(sig_diff))) { |
631 table_item(as.character(sig_diff[i, j])) | 652 table_item(as.character(sig_diff[i, j])) |
632 } | 653 } |
633 cata("</tr>\n") | 654 cata("</tr>\n") |
634 } | 655 } |
635 cata("</table>") | 656 cata("</table>") |
636 | 657 |
637 cata("<h4>Plots:</h4>\n") | 658 cata("<h4>Plots:</h4>\n") |
638 for (i in seq_len(nrow(link_data))) { | 659 for (i in seq_len(nrow(link_data))) { |
639 if (grepl(".pdf", link_data$Link[i])) { | 660 if (grepl(".pdf", link_data$Link[i])) { |
640 html_link(link_data$Link[i], link_data$Label[i]) | 661 html_link(link_data$Link[i], link_data$Label[i]) |
641 } | 662 } |
642 } | 663 } |
643 | 664 |
644 cata("<h4>Tables:</h4>\n") | 665 cata("<h4>Tables:</h4>\n") |
645 for (i in seq_len(nrow(link_data))) { | 666 for (i in seq_len(nrow(link_data))) { |
646 if (grepl(".tsv", link_data$Link[i])) { | 667 if (grepl(".tsv", link_data$Link[i])) { |
647 html_link(link_data$Link[i], link_data$Label[i]) | 668 html_link(link_data$Link[i], link_data$Label[i]) |
648 } | 669 } |
649 } | 670 } |
650 | 671 |
651 if (want_rda) { | 672 if (want_rda) { |
652 cata("<h4>R Data Objects:</h4>\n") | 673 cata("<h4>R Data Objects:</h4>\n") |
653 for (i in seq_len(nrow(link_data))) { | 674 for (i in seq_len(nrow(link_data))) { |
654 if (grepl(".RData", link_data$Link[i])) { | 675 if (grepl(".RData", link_data$Link[i])) { |
655 html_link(link_data$Link[i], link_data$Label[i]) | 676 html_link(link_data$Link[i], link_data$Label[i]) |
656 } | 677 } |
657 } | 678 } |
658 } | 679 } |
659 | 680 |
660 cata("<p>Alt-click links to download file.</p>\n") | 681 cata("<p>Alt-click links to download file.</p>\n") |
661 cata("<p>Click floppy disc icon associated history item to download ") | 682 cata("<p>Click floppy disc icon associated history item to download ") |
662 cata("all files.</p>\n") | 683 cata("all files.</p>\n") |
664 | 685 |
665 cata("<h4>Additional Information</h4>\n") | 686 cata("<h4>Additional Information</h4>\n") |
666 cata("<ul>\n") | 687 cata("<ul>\n") |
667 | 688 |
668 if (filt_cpm || filt_smpcount || filt_totcount) { | 689 if (filt_cpm || filt_smpcount || filt_totcount) { |
669 if (filt_cpm) { | 690 if (filt_cpm) { |
670 temp_str <- paste( | 691 temp_str <- paste( |
671 "Genes without more than", opt$cpmReq, | 692 "Genes without more than", opt$cpmReq, |
672 "CPM in at least", opt$sampleReq, "samples are insignificant", | 693 "CPM in at least", opt$sampleReq, "samples are insignificant", |
673 "and filtered out." | 694 "and filtered out." |
695 ) | |
696 } else if (filt_smpcount) { | |
697 temp_str <- paste( | |
698 "Genes without more than", opt$cntReq, | |
699 "counts in at least", opt$sampleReq, "samples are insignificant", | |
700 "and filtered out." | |
701 ) | |
702 } else if (filt_totcount) { | |
703 temp_str <- paste( | |
704 "Genes without more than", opt$cntReq, | |
705 "counts, after summing counts for all samples, are insignificant", | |
706 "and filtered out." | |
707 ) | |
708 } | |
709 | |
710 list_item(temp_str) | |
711 filter_prop <- round(filtered_count / prefilter_count * 100, digits = 2) | |
712 temp_str <- paste0( | |
713 filtered_count, " of ", prefilter_count, " (", filter_prop, | |
714 "%) genes were filtered out for low expression." | |
674 ) | 715 ) |
675 } else if (filt_smpcount) { | 716 list_item(temp_str) |
676 temp_str <- paste( | |
677 "Genes without more than", opt$cntReq, | |
678 "counts in at least", opt$sampleReq, "samples are insignificant", | |
679 "and filtered out." | |
680 ) | |
681 } else if (filt_totcount) { | |
682 temp_str <- paste( | |
683 "Genes without more than", opt$cntReq, | |
684 "counts, after summing counts for all samples, are insignificant", | |
685 "and filtered out." | |
686 ) | |
687 } | |
688 | |
689 list_item(temp_str) | |
690 filter_prop <- round(filtered_count / prefilter_count * 100, digits = 2) | |
691 temp_str <- paste0( | |
692 filtered_count, " of ", prefilter_count, " (", filter_prop, | |
693 "%) genes were filtered out for low expression." | |
694 ) | |
695 list_item(temp_str) | |
696 } | 717 } |
697 list_item(opt$normOpt, " was the method used to normalise library sizes.") | 718 list_item(opt$normOpt, " was the method used to normalise library sizes.") |
698 if (want_lrt) { | 719 if (want_lrt) { |
699 list_item("The edgeR likelihood ratio test was used.") | 720 list_item("The edgeR likelihood ratio test was used.") |
700 } else { | 721 } else { |
701 if (want_robust) { | 722 if (want_robust) { |
702 list_item("The edgeR quasi-likelihood test was used with robust settings (robust=TRUE with estimateDisp and glmQLFit).") | 723 list_item("The edgeR quasi-likelihood test was used with robust settings (robust=TRUE with estimateDisp and glmQLFit).") |
703 } else { | 724 } else { |
704 list_item("The edgeR quasi-likelihood test was used.") | 725 list_item("The edgeR quasi-likelihood test was used.") |
705 } | 726 } |
706 } | 727 } |
707 if (opt$pAdjOpt != "none") { | 728 if (opt$pAdjOpt != "none") { |
708 if (opt$pAdjOpt == "BH" || opt$pAdjOpt == "BY") { | 729 if (opt$pAdjOpt == "BH" || opt$pAdjOpt == "BY") { |
730 temp_str <- paste0( | |
731 "MD-Plot highlighted genes are significant at FDR ", | |
732 "of ", opt$pValReq, " and exhibit log2-fold-change of at ", | |
733 "least ", opt$lfcReq, "." | |
734 ) | |
735 list_item(temp_str) | |
736 } else if (opt$pAdjOpt == "holm") { | |
737 temp_str <- paste0( | |
738 "MD-Plot highlighted genes are significant at adjusted ", | |
739 "p-value of ", opt$pValReq, " by the Holm(1979) ", | |
740 "method, and exhibit log2-fold-change of at least ", | |
741 opt$lfcReq, "." | |
742 ) | |
743 list_item(temp_str) | |
744 } | |
745 } else { | |
709 temp_str <- paste0( | 746 temp_str <- paste0( |
710 "MD-Plot highlighted genes are significant at FDR ", | 747 "MD-Plot highlighted genes are significant at p-value ", |
711 "of ", opt$pValReq, " and exhibit log2-fold-change of at ", | 748 "of ", opt$pValReq, " and exhibit log2-fold-change of at ", |
712 "least ", opt$lfcReq, "." | 749 "least ", opt$lfcReq, "." |
713 ) | 750 ) |
714 list_item(temp_str) | 751 list_item(temp_str) |
715 } else if (opt$pAdjOpt == "holm") { | |
716 temp_str <- paste0( | |
717 "MD-Plot highlighted genes are significant at adjusted ", | |
718 "p-value of ", opt$pValReq, " by the Holm(1979) ", | |
719 "method, and exhibit log2-fold-change of at least ", | |
720 opt$lfcReq, "." | |
721 ) | |
722 list_item(temp_str) | |
723 } | |
724 } else { | |
725 temp_str <- paste0( | |
726 "MD-Plot highlighted genes are significant at p-value ", | |
727 "of ", opt$pValReq, " and exhibit log2-fold-change of at ", | |
728 "least ", opt$lfcReq, "." | |
729 ) | |
730 list_item(temp_str) | |
731 } | 752 } |
732 cata("</ul>\n") | 753 cata("</ul>\n") |
733 | 754 |
734 cata("<h4>Summary of experimental data:</h4>\n") | 755 cata("<h4>Summary of experimental data:</h4>\n") |
735 | 756 |
739 cata("<tr>\n") | 760 cata("<tr>\n") |
740 table_head_item("SampleID") | 761 table_head_item("SampleID") |
741 table_head_item(names(factors)[1], " (Primary Factor)") | 762 table_head_item(names(factors)[1], " (Primary Factor)") |
742 | 763 |
743 if (ncol(factors) > 1) { | 764 if (ncol(factors) > 1) { |
744 for (i in names(factors)[2:length(names(factors))]) { | 765 for (i in names(factors)[2:length(names(factors))]) { |
745 table_head_item(i) | 766 table_head_item(i) |
746 } | 767 } |
747 cata("</tr>\n") | 768 cata("</tr>\n") |
748 } | 769 } |
749 | 770 |
750 for (i in seq_len(nrow((factors)))) { | 771 for (i in seq_len(nrow((factors)))) { |
751 cata("<tr>\n") | 772 cata("<tr>\n") |
752 table_head_item(row.names(factors)[i]) | 773 table_head_item(row.names(factors)[i]) |
753 for (j in seq_len(ncol(factors))) { | 774 for (j in seq_len(ncol(factors))) { |
754 table_item(as.character(unmake_names(factors[i, j]))) | 775 table_item(as.character(unmake_names(factors[i, j]))) |
755 } | 776 } |
756 cata("</tr>\n") | 777 cata("</tr>\n") |
757 } | 778 } |
758 cata("</table>") | 779 cata("</table>") |
759 | 780 |
760 for (i in seq_len(nrow(link_data))) { | 781 for (i in seq_len(nrow(link_data))) { |
761 if (grepl("session_info", link_data$Link[i])) { | 782 if (grepl("session_info", link_data$Link[i])) { |
762 html_link(link_data$Link[i], link_data$Label[i]) | 783 html_link(link_data$Link[i], link_data$Label[i]) |
763 } | 784 } |
764 } | 785 } |
765 | 786 |
766 cata("<table border=\"0\">\n") | 787 cata("<table border=\"0\">\n") |
767 cata("<tr>\n") | 788 cata("<tr>\n") |
768 table_item("Task started at:") | 789 table_item("Task started at:") |