Mercurial > repos > shians > shrnaseq
annotate hairpinTool.R @ 20:7c2e09eb72e5
- Changed xml tool name back to shrnaseq
| author | shian_su <registertonysu@gmail.com> |
|---|---|
| date | Wed, 19 Nov 2014 23:24:57 +1100 |
| parents | e7cc9c011df0 |
| children |
| rev | line source |
|---|---|
| 1 | 1 # ARGS: 1.inputType -String specifying format of input (fastq or table) |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
2 # IF inputType is "fastq" or "pairedFastq: |
| 1 | 3 # 2*.fastqPath -One or more strings specifying path to fastq files |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
4 # 2.annoPath -String specifying path to hairpin annotation table |
| 1 | 5 # 3.samplePath -String specifying path to sample annotation table |
| 6 # 4.barStart -Integer specifying starting position of barcode | |
| 7 # 5.barEnd -Integer specifying ending position of barcode | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
8 # ### |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
9 # IF inputType is "pairedFastq": |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
10 # 6.barStartRev -Integer specifying starting position of barcode |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
11 # on reverse end |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
12 # 7.barEndRev -Integer specifying ending position of barcode |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
13 # on reverse end |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
14 # ### |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
15 # 8.hpStart -Integer specifying startins position of hairpin |
| 1 | 16 # unique region |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
17 # 9.hpEnd -Integer specifying ending position of hairpin |
| 1 | 18 # unique region |
| 19 # IF inputType is "counts": | |
| 20 # 2.countPath -String specifying path to count table | |
| 21 # 3.annoPath -String specifying path to hairpin annotation table | |
| 22 # 4.samplePath -String specifying path to sample annotation table | |
| 23 # ### | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
24 # 10.secFactName -String specifying name of secondary factor |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
25 # 11.cpmReq -Float specifying cpm requirement |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
26 # 12.sampleReq -Integer specifying cpm requirement |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
27 # 13.readReq -Integer specifying read requirement |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
28 # 14.fdrThresh -Float specifying the FDR requirement |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
29 # 15.lfcThresh -Float specifying the log-fold-change requirement |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
30 # 16.workMode -String specifying exact test or GLM usage |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
31 # 17.htmlPath -String specifying path to HTML file |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
32 # 18.folderPath -String specifying path to folder for output |
| 1 | 33 # IF workMode is "classic" (exact test) |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
34 # 19.pairData[2] -String specifying first group for exact test |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
35 # 20.pairData[1] -String specifying second group for exact test |
| 1 | 36 # ### |
| 37 # IF workMode is "glm" | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
38 # 19.contrastData -String specifying contrasts to be made |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
39 # 20.roastOpt -String specifying usage of gene-wise tests |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
40 # 21.hairpinReq -String specifying hairpin requirement for gene- |
| 1 | 41 # wise test |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
42 # 22.selectOpt -String specifying type of selection for barcode |
| 1 | 43 # plots |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
44 # 23.selectVals -String specifying members selected for barcode |
| 1 | 45 # plots |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
46 # ### |
| 1 | 47 # |
| 48 # OUT: Bar Plot of Counts Per Index | |
| 49 # Bar Plot of Counts Per Hairpin | |
| 50 # MDS Plot | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
51 # BCV Plot |
| 1 | 52 # Smear Plot |
| 53 # Barcode Plots (If Genewise testing was selected) | |
| 54 # Top Expression Table | |
| 3 | 55 # Feature Counts Table |
| 1 | 56 # HTML file linking to the ouputs |
| 57 # | |
| 58 # Author: Shian Su - registertonysu@gmail.com - Jan 2014 | |
| 59 | |
| 0 | 60 # Record starting time |
| 61 timeStart <- as.character(Sys.time()) | |
| 62 | |
| 63 # Loading and checking required packages | |
| 64 library(methods, quietly=TRUE, warn.conflicts=FALSE) | |
| 65 library(statmod, quietly=TRUE, warn.conflicts=FALSE) | |
| 66 library(splines, quietly=TRUE, warn.conflicts=FALSE) | |
| 67 library(edgeR, quietly=TRUE, warn.conflicts=FALSE) | |
| 68 library(limma, quietly=TRUE, warn.conflicts=FALSE) | |
| 69 | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
70 if (packageVersion("edgeR") < "3.7.17") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
71 stop("Please update 'edgeR' to version >= 3.7.17 to run this tool") |
| 3 | 72 } |
| 73 | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
74 if (packageVersion("limma")<"3.21.16") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
75 message("Update 'limma' to version >= 3.21.16 to see updated barcode graphs") |
| 0 | 76 } |
| 77 | |
| 78 ################################################################################ | |
| 79 ### Function declarations | |
| 80 ################################################################################ | |
| 81 | |
| 3 | 82 # Function to load libaries without messages |
| 83 silentLibrary <- function(...) { | |
| 84 list <- c(...) | |
| 85 for (package in list){ | |
| 86 suppressPackageStartupMessages(library(package, character.only=TRUE)) | |
| 87 } | |
| 88 } | |
| 89 | |
| 0 | 90 # Function to sanitise contrast equations so there are no whitespaces |
| 91 # surrounding the arithmetic operators, leading or trailing whitespace | |
| 92 sanitiseEquation <- function(equation) { | |
| 93 equation <- gsub(" *[+] *", "+", equation) | |
| 94 equation <- gsub(" *[-] *", "-", equation) | |
| 95 equation <- gsub(" *[/] *", "/", equation) | |
| 96 equation <- gsub(" *[*] *", "*", equation) | |
| 97 equation <- gsub("^\\s+|\\s+$", "", equation) | |
| 98 return(equation) | |
| 99 } | |
| 100 | |
| 101 # Function to sanitise group information | |
| 102 sanitiseGroups <- function(string) { | |
| 103 string <- gsub(" *[,] *", ",", string) | |
| 104 string <- gsub("^\\s+|\\s+$", "", string) | |
| 105 return(string) | |
| 106 } | |
| 107 | |
| 108 # Function to change periods to whitespace in a string | |
| 109 unmake.names <- function(string) { | |
| 110 string <- gsub(".", " ", string, fixed=TRUE) | |
| 111 return(string) | |
| 112 } | |
| 113 | |
| 114 # Function has string input and generates an output path string | |
| 115 makeOut <- function(filename) { | |
| 116 return(paste0(folderPath, "/", filename)) | |
| 117 } | |
| 118 | |
| 119 # Function has string input and generates both a pdf and png output strings | |
| 120 imgOut <- function(filename) { | |
| 121 assign(paste0(filename, "Png"), makeOut(paste0(filename,".png")), | |
| 3 | 122 envir=.GlobalEnv) |
| 0 | 123 assign(paste0(filename, "Pdf"), makeOut(paste0(filename,".pdf")), |
| 3 | 124 envir=.GlobalEnv) |
| 0 | 125 } |
| 126 | |
| 127 # Create cat function default path set, default seperator empty and appending | |
| 128 # true by default (Ripped straight from the cat function with altered argument | |
| 129 # defaults) | |
| 3 | 130 cata <- function(..., file=htmlPath, sep="", fill=FALSE, labels=NULL, |
| 131 append=TRUE) { | |
| 0 | 132 if (is.character(file)) |
| 133 if (file == "") | |
| 134 file <- stdout() | |
| 135 else if (substring(file, 1L, 1L) == "|") { | |
| 136 file <- pipe(substring(file, 2L), "w") | |
| 137 on.exit(close(file)) | |
| 138 } | |
| 139 else { | |
| 140 file <- file(file, ifelse(append, "a", "w")) | |
| 141 on.exit(close(file)) | |
| 142 } | |
| 143 .Internal(cat(list(...), file, sep, fill, labels, append)) | |
| 144 } | |
| 145 | |
| 146 # Function to write code for html head and title | |
| 147 HtmlHead <- function(title) { | |
| 148 cata("<head>\n") | |
| 149 cata("<title>", title, "</title>\n") | |
| 150 cata("</head>\n") | |
| 151 } | |
| 152 | |
| 153 # Function to write code for html links | |
| 154 HtmlLink <- function(address, label=address) { | |
| 155 cata("<a href=\"", address, "\" target=\"_blank\">", label, "</a><br />\n") | |
| 156 } | |
| 157 | |
| 158 # Function to write code for html images | |
| 159 HtmlImage <- function(source, label=source, height=600, width=600) { | |
| 160 cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height) | |
| 161 cata("\" width=\"", width, "\"/>\n") | |
| 162 } | |
| 163 | |
| 164 # Function to write code for html list items | |
| 165 ListItem <- function(...) { | |
| 166 cata("<li>", ..., "</li>\n") | |
| 167 } | |
| 168 | |
| 169 TableItem <- function(...) { | |
| 170 cata("<td>", ..., "</td>\n") | |
| 171 } | |
| 172 | |
| 173 TableHeadItem <- function(...) { | |
| 174 cata("<th>", ..., "</th>\n") | |
| 175 } | |
| 176 ################################################################################ | |
| 177 ### Input Processing | |
| 178 ################################################################################ | |
| 179 | |
| 180 # Grabbing arguments from command line | |
| 181 argv <- commandArgs(TRUE) | |
| 182 | |
| 183 inputType <- as.character(argv[1]) | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
184 if (inputType == "fastq") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
185 |
| 0 | 186 fastqPath <- as.character(gsub("fastq::", "", argv[grepl("fastq::", argv)], |
| 187 fixed=TRUE)) | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
188 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
189 # Remove fastq paths |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
190 argv <- argv[!grepl("fastq::", argv, fixed=TRUE)] |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
191 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
192 fastqPathRev <- NULL |
| 1 | 193 annoPath <- as.character(argv[2]) |
| 0 | 194 samplePath <- as.character(argv[3]) |
| 195 barStart <- as.numeric(argv[4]) | |
| 196 barEnd <- as.numeric(argv[5]) | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
197 barStartRev <- NULL |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
198 barStartRev <- NULL |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
199 hpStart <- as.numeric(argv[8]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
200 hpEnd <- as.numeric(argv[9]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
201 } else if (inputType=="pairedFastq") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
202 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
203 fastqPath <- as.character(gsub("fastq::", "", argv[grepl("fastq::", argv)], |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
204 fixed=TRUE)) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
205 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
206 fastqPathRev <- as.character(gsub("fastqRev::", "", |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
207 argv[grepl("fastqRev::", argv)], fixed=TRUE)) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
208 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
209 # Remove fastq paths |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
210 argv <- argv[!grepl("fastq::", argv, fixed=TRUE)] |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
211 argv <- argv[!grepl("fastqRev::", argv, fixed=TRUE)] |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
212 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
213 annoPath <- as.character(argv[2]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
214 samplePath <- as.character(argv[3]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
215 barStart <- as.numeric(argv[4]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
216 barEnd <- as.numeric(argv[5]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
217 barStartRev <- as.numeric(argv[6]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
218 barEndRev <- as.numeric(argv[7]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
219 hpStart <- as.numeric(argv[8]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
220 hpEnd <- as.numeric(argv[9]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
221 } else if (inputType == "counts") { |
| 0 | 222 countPath <- as.character(argv[2]) |
| 223 annoPath <- as.character(argv[3]) | |
| 224 samplePath <- as.character(argv[4]) | |
| 225 } | |
| 3 | 226 |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
227 secFactName <- as.character(argv[10]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
228 cpmReq <- as.numeric(argv[11]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
229 sampleReq <- as.numeric(argv[12]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
230 readReq <- as.numeric(argv[13]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
231 fdrThresh <- as.numeric(argv[14]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
232 lfcThresh <- as.numeric(argv[15]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
233 selectDirection <- as.character(argv[16]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
234 workMode <- as.character(argv[17]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
235 htmlPath <- as.character(argv[18]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
236 folderPath <- as.character(argv[19]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
237 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
238 if (workMode == "classic") { |
| 0 | 239 pairData <- character() |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
240 pairData[2] <- as.character(argv[20]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
241 pairData[1] <- as.character(argv[21]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
242 } else if (workMode == "glm") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
243 contrastData <- as.character(argv[20]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
244 roastOpt <- as.character(argv[21]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
245 hairpinReq <- as.numeric(argv[22]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
246 selectOpt <- as.character(argv[23]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
247 selectVals <- as.character(argv[24]) |
| 0 | 248 } |
| 249 | |
| 250 # Read in inputs | |
| 1 | 251 |
| 252 samples <- read.table(samplePath, header=TRUE, sep="\t") | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
253 |
| 1 | 254 anno <- read.table(annoPath, header=TRUE, sep="\t") |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
255 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
256 if (inputType == "counts") { |
| 0 | 257 counts <- read.table(countPath, header=TRUE, sep="\t") |
| 258 } | |
| 1 | 259 |
| 0 | 260 ###################### Check inputs for correctness ############################ |
| 261 samples$ID <- make.names(samples$ID) | |
| 262 | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
263 if ( !any(grepl("group", names(samples))) ) { |
| 0 | 264 stop("'group' column not specified in sample annotation file") |
| 265 } # Check if grouping variable has been specified | |
| 266 | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
267 if (secFactName != "none") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
268 if ( !any(grepl(secFactName, names(samples))) ) { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
269 tempStr <- paste0("Second factor specified as \"", secFactName, "\" but ", |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
270 "no such column name found in sample annotation file") |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
271 stop(tempStr) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
272 } # Check if specified secondary factor is present |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
273 } |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
274 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
275 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
276 if ( any(table(samples$ID) > 1) ){ |
| 0 | 277 tab <- table(samples$ID) |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
278 offenders <- paste(names(tab[tab > 1]), collapse=", ") |
| 0 | 279 offenders <- unmake.names(offenders) |
| 1 | 280 stop("'ID' column of sample annotation must have unique values, values ", |
| 0 | 281 offenders, " are repeated") |
| 282 } # Check that IDs in sample annotation are unique | |
| 283 | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
284 if (inputType == "fastq" || inputType == "pairedFastq") { |
| 1 | 285 |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
286 if ( any(table(anno$ID) > 1) ){ |
| 1 | 287 tab <- table(anno$ID) |
| 0 | 288 offenders <- paste(names(tab[tab>1]), collapse=", ") |
| 1 | 289 stop("'ID' column of hairpin annotation must have unique values, values ", |
| 0 | 290 offenders, " are repeated") |
| 291 } # Check that IDs in hairpin annotation are unique | |
| 1 | 292 |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
293 } else if (inputType == "counts") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
294 # The first element of the colnames will be 'ID' and should not match |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
295 idFromSample <- samples$ID |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
296 idFromTable <- colnames(counts)[-1] |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
297 if (any(is.na(match(idFromTable, idFromSample)))) { |
| 1 | 298 stop("not all samples have groups specified") |
| 299 } # Check that a group has be specifed for each sample | |
| 0 | 300 |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
301 if ( any(table(counts$ID) > 1) ){ |
| 0 | 302 tab <- table(counts$ID) |
| 303 offenders <- paste(names(tab[tab>1]), collapse=", ") | |
| 1 | 304 stop("'ID' column of count table must have unique values, values ", |
| 0 | 305 offenders, " are repeated") |
| 306 } # Check that IDs in count table are unique | |
| 307 } | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
308 if (workMode == "glm") { |
| 1 | 309 if (roastOpt == "yes") { |
| 310 if (is.na(match("Gene", colnames(anno)))) { | |
| 311 tempStr <- paste("Gene-wise tests selected but'Gene' column not", | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
312 "specified in hairpin annotation file") |
| 1 | 313 stop(tempStr) |
| 314 } | |
| 315 } | |
| 316 } | |
| 317 | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
318 if (secFactName != "none") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
319 if (workMode != "glm") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
320 tempStr <- paste("only glm analysis type possible when secondary factor", |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
321 "used, please change appropriate option.") |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
322 } |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
323 } |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
324 |
| 0 | 325 ################################################################################ |
| 326 | |
| 327 # Process arguments | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
328 if (workMode == "glm") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
329 if (roastOpt == "yes") { |
| 0 | 330 wantRoast <- TRUE |
| 331 } else { | |
| 332 wantRoast <- FALSE | |
| 333 } | |
| 334 } | |
| 335 | |
| 336 # Split up contrasts seperated by comma into a vector and replace spaces with | |
| 337 # periods | |
| 338 if (exists("contrastData")) { | |
| 339 contrastData <- unlist(strsplit(contrastData, split=",")) | |
| 340 contrastData <- sanitiseEquation(contrastData) | |
| 341 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE) | |
| 342 } | |
| 343 | |
| 344 # Replace spaces with periods in pair data | |
| 345 if (exists("pairData")) { | |
| 346 pairData <- make.names(pairData) | |
| 347 } | |
| 348 | |
| 349 # Generate output folder and paths | |
| 3 | 350 dir.create(folderPath, showWarnings=FALSE) |
| 0 | 351 |
| 352 # Generate links for outputs | |
| 353 imgOut("barHairpin") | |
| 354 imgOut("barIndex") | |
| 355 imgOut("mds") | |
| 356 imgOut("bcv") | |
| 357 if (workMode == "classic") { | |
| 358 smearPng <- makeOut(paste0("smear(", pairData[2], "-", pairData[1],").png")) | |
| 359 smearPdf <- makeOut(paste0("smear(", pairData[2], "-", pairData[1],").pdf")) | |
| 360 topOut <- makeOut(paste0("toptag(", pairData[2], "-", pairData[1],").tsv")) | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
361 } else if (workMode == "glm") { |
| 0 | 362 smearPng <- character() |
| 363 smearPdf <- character() | |
| 364 topOut <- character() | |
| 365 roastOut <- character() | |
| 366 barcodePng <- character() | |
| 367 barcodePdf <- character() | |
| 368 for (i in 1:length(contrastData)) { | |
| 369 smearPng[i] <- makeOut(paste0("smear(", contrastData[i], ").png")) | |
| 370 smearPdf[i] <- makeOut(paste0("smear(", contrastData[i], ").pdf")) | |
| 371 topOut[i] <- makeOut(paste0("toptag(", contrastData[i], ").tsv")) | |
| 3 | 372 roastOut[i] <- makeOut(paste0("gene_level(", contrastData[i], ").tsv")) |
| 0 | 373 barcodePng[i] <- makeOut(paste0("barcode(", contrastData[i], ").png")) |
| 374 barcodePdf[i] <- makeOut(paste0("barcode(", contrastData[i], ").pdf")) | |
| 375 } | |
| 376 } | |
| 3 | 377 countsOut <- makeOut("counts.tsv") |
| 378 sessionOut <- makeOut("session_info.txt") | |
| 379 | |
| 0 | 380 # Initialise data for html links and images, table with the link label and |
| 381 # link address | |
| 382 linkData <- data.frame(Label=character(), Link=character(), | |
| 383 stringsAsFactors=FALSE) | |
| 384 imageData <- data.frame(Label=character(), Link=character(), | |
| 385 stringsAsFactors=FALSE) | |
| 3 | 386 |
| 387 # Initialise vectors for storage of up/down/neutral regulated counts | |
| 388 upCount <- numeric() | |
| 389 downCount <- numeric() | |
| 390 flatCount <- numeric() | |
| 391 | |
| 0 | 392 ################################################################################ |
| 393 ### Data Processing | |
| 394 ################################################################################ | |
| 395 | |
| 396 # Transform gene selection from string into index values for mroast | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
397 if (workMode == "glm") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
398 if (selectOpt == "rank") { |
| 0 | 399 selectVals <- gsub(" ", "", selectVals, fixed=TRUE) |
| 400 selectVals <- unlist(strsplit(selectVals, ",")) | |
| 401 | |
| 402 for (i in 1:length(selectVals)) { | |
| 403 if (grepl(":", selectVals[i], fixed=TRUE)) { | |
| 404 temp <- unlist(strsplit(selectVals[i], ":")) | |
| 405 selectVals <- selectVals[-i] | |
| 406 a <- as.numeric(temp[1]) | |
| 407 b <- as.numeric(temp[2]) | |
| 408 selectVals <- c(selectVals, a:b) | |
| 409 } | |
| 410 } | |
| 411 selectVals <- as.numeric(unique(selectVals)) | |
| 412 } else { | |
| 413 selectVals <- gsub(" ", "", selectVals, fixed=TRUE) | |
| 3 | 414 selectVals <- unlist(strsplit(selectVals, ",")) |
| 0 | 415 } |
| 416 } | |
| 417 | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
418 if (inputType == "fastq" || inputType == "pairedFastq") { |
| 1 | 419 # Use EdgeR hairpin process and capture outputs |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
420 |
| 1 | 421 hpReadout <- capture.output( |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
422 data <- processAmplicons(readfile=fastqPath, readfile2=fastqPathRev, |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
423 barcodefile=samplePath, |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
424 hairpinfile=annoPath, |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
425 barcodeStart=barStart, barcodeEnd=barEnd, |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
426 barcodeStartRev=barStartRev, |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
427 barcodeEndRev=barEndRev, |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
428 hairpinStart=hpStart, hairpinEnd=hpEnd, |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
429 verbose=TRUE) |
| 1 | 430 ) |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
431 |
| 1 | 432 # Remove function output entries that show processing data or is empty |
| 433 hpReadout <- hpReadout[hpReadout!=""] | |
| 434 hpReadout <- hpReadout[!grepl("Processing", hpReadout)] | |
| 435 hpReadout <- hpReadout[!grepl("in file", hpReadout)] | |
| 436 hpReadout <- gsub(" -- ", "", hpReadout, fixed=TRUE) | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
437 |
| 1 | 438 # Make the names of groups syntactically valid (replace spaces with periods) |
| 439 data$samples$group <- make.names(data$samples$group) | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
440 if (secFactName != "none") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
441 data$samples[[secFactName]] <- make.names(data$samples[[secFactName]]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
442 } |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
443 } else if (inputType == "counts") { |
| 0 | 444 # Process counts information, set ID column to be row names |
| 445 rownames(counts) <- counts$ID | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
446 counts <- counts[ , !(colnames(counts) == "ID")] |
| 0 | 447 countsRows <- nrow(counts) |
| 448 | |
| 449 # Process group information | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
450 sampleNames <- colnames(counts) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
451 matchedIndex <- match(sampleNames, samples$ID) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
452 factors <- samples$group[matchedIndex] |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
453 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
454 if (secFactName != "none") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
455 secFactors <- samples[[secFactName]][matchedIndex] |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
456 } |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
457 |
| 0 | 458 annoRows <- nrow(anno) |
| 459 anno <- anno[match(rownames(counts), anno$ID), ] | |
| 460 annoMatched <- sum(!is.na(anno$ID)) | |
| 461 | |
| 462 if (any(is.na(anno$ID))) { | |
| 463 warningStr <- paste("count table contained more hairpins than", | |
| 464 "specified in hairpin annotation file") | |
| 465 warning(warningStr) | |
| 466 } | |
| 467 | |
| 468 # Filter out rows with zero counts | |
| 469 sel <- rowSums(counts)!=0 | |
| 470 counts <- counts[sel, ] | |
| 471 anno <- anno[sel, ] | |
| 472 | |
| 473 # Create DGEList | |
| 474 data <- DGEList(counts=counts, lib.size=colSums(counts), | |
| 475 norm.factors=rep(1,ncol(counts)), genes=anno, group=factors) | |
| 1 | 476 |
| 0 | 477 # Make the names of groups syntactically valid (replace spaces with periods) |
| 478 data$samples$group <- make.names(data$samples$group) | |
| 479 } | |
| 480 | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
481 # Filter out any samples with zero counts |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
482 if (any(data$samples$lib.size == 0)) { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
483 sampleSel <- data$samples$lib.size != 0 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
484 filteredSamples <- paste(data$samples$ID[!sampleSel], collapse=", ") |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
485 data$counts <- data$counts[, sampleSel] |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
486 data$samples <- data$samples[sampleSel, ] |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
487 } |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
488 |
| 0 | 489 # Filter hairpins with low counts |
| 1 | 490 preFilterCount <- nrow(data) |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
491 selRow <- rowSums(cpm(data$counts) > cpmReq) >= sampleReq |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
492 selCol <- colSums(data$counts) > readReq |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
493 data <- data[selRow, selCol] |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
494 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
495 # Check if any data survived filtering |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
496 if (length(data$counts) == 0) { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
497 stop("no data remains after filtering, consider relaxing filters") |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
498 } |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
499 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
500 # Count number of filtered tags and samples |
| 1 | 501 postFilterCount <- nrow(data) |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
502 filteredCount <- preFilterCount - postFilterCount |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
503 sampleFilterCount <- sum(!selCol) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
504 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
505 if (secFactName == "none") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
506 # Estimate dispersions |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
507 data <- estimateDisp(data) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
508 commonBCV <- round(sqrt(data$common.dispersion), 4) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
509 } else { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
510 # Construct design |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
511 if (inputType == "counts") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
512 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
513 sampleNames <- colnames(counts) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
514 matchedIndex <- match(sampleNames, samples$ID) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
515 factors <- factor(make.names(samples$group[matchedIndex])) |
| 0 | 516 |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
517 secFactors <- factor(make.names(samples[[secFactName]][matchedIndex])) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
518 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
519 } else if (inputType == "fastq" || inputType == "pairedFastq") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
520 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
521 factors <- factor(data$sample$group) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
522 secFactors <- factor(data$sample[[secFactName]]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
523 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
524 } |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
525 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
526 design <- model.matrix(~0 + factors + secFactors) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
527 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
528 # Estimate dispersions |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
529 data <- estimateDisp(data, design=design) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
530 commonBCV <- round(sqrt(data$common.dispersion), 4) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
531 } |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
532 |
| 0 | 533 |
| 534 ################################################################################ | |
| 535 ### Output Processing | |
| 536 ################################################################################ | |
| 537 | |
| 538 # Plot number of hairpins that could be matched per sample | |
| 539 png(barIndexPng, width=600, height=600) | |
| 540 barplot(height<-colSums(data$counts), las=2, main="Counts per index", | |
| 541 cex.names=1.0, cex.axis=0.8, ylim=c(0, max(height)*1.2)) | |
| 542 imageData[1, ] <- c("Counts per Index", "barIndex.png") | |
| 543 invisible(dev.off()) | |
| 544 | |
| 545 pdf(barIndexPdf) | |
| 546 barplot(height<-colSums(data$counts), las=2, main="Counts per index", | |
| 547 cex.names=1.0, cex.axis=0.8, ylim=c(0, max(height)*1.2)) | |
| 548 linkData[1, ] <- c("Counts per Index Barplot (.pdf)", "barIndex.pdf") | |
| 549 invisible(dev.off()) | |
| 550 | |
| 551 # Plot per hairpin totals across all samples | |
| 552 png(barHairpinPng, width=600, height=600) | |
| 553 if (nrow(data$counts)<50) { | |
| 554 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin", | |
| 555 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2)) | |
| 556 } else { | |
| 557 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin", | |
| 558 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2), | |
| 559 names.arg=FALSE) | |
| 560 } | |
| 561 imageData <- rbind(imageData, c("Counts per Hairpin", "barHairpin.png")) | |
| 562 invisible(dev.off()) | |
| 563 | |
| 564 pdf(barHairpinPdf) | |
| 565 if (nrow(data$counts)<50) { | |
| 566 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin", | |
| 567 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2)) | |
| 568 } else { | |
| 569 barplot(height<-rowSums(data$counts), las=2, main="Counts per hairpin", | |
| 570 cex.names=0.8, cex.axis=0.8, ylim=c(0, max(height)*1.2), | |
| 571 names.arg=FALSE) | |
| 572 } | |
| 573 newEntry <- c("Counts per Hairpin Barplot (.pdf)", "barHairpin.pdf") | |
| 574 linkData <- rbind(linkData, newEntry) | |
| 575 invisible(dev.off()) | |
| 576 | |
| 577 # Make an MDS plot to visualise relationships between replicate samples | |
| 578 png(mdsPng, width=600, height=600) | |
| 579 plotMDS(data, labels=data$samples$group, col=as.numeric(data$samples$group), | |
| 580 main="MDS Plot") | |
| 581 imageData <- rbind(imageData, c("MDS Plot", "mds.png")) | |
| 582 invisible(dev.off()) | |
| 583 | |
| 584 pdf(mdsPdf) | |
| 585 plotMDS(data, labels=data$samples$group, col=as.numeric(data$samples$group), | |
| 586 main="MDS Plot") | |
| 587 newEntry <- c("MDS Plot (.pdf)", "mds.pdf") | |
| 588 linkData <- rbind(linkData, newEntry) | |
| 589 invisible(dev.off()) | |
| 590 | |
| 3 | 591 # BCV Plot |
| 592 png(bcvPng, width=600, height=600) | |
| 593 plotBCV(data, main="BCV Plot") | |
| 594 imageData <- rbind(imageData, c("BCV Plot", "bcv.png")) | |
| 595 invisible(dev.off()) | |
| 596 | |
| 597 pdf(bcvPdf) | |
| 598 plotBCV(data, main="BCV Plot") | |
| 599 newEntry <- c("BCV Plot (.pdf)", "bcv.pdf") | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
600 linkData <- rbind(linkData, newEntry) |
| 3 | 601 invisible(dev.off()) |
| 602 | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
603 if (workMode == "classic") { |
| 0 | 604 # Assess differential representation using classic exact testing methodology |
| 605 # in edgeR | |
| 606 testData <- exactTest(data, pair=pairData) | |
| 607 | |
| 608 top <- topTags(testData, n=Inf) | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
609 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
610 if (selectDirection == "all") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
611 topIDs <- top$table[(top$table$FDR < fdrThresh) & |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
612 (abs(top$table$logFC) > lfcThresh), 1] |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
613 } else if (selectDirection == "up") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
614 topIDs <- top$table[(top$table$FDR < fdrThresh) & |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
615 (top$table$logFC > lfcThresh), 1] |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
616 } else if (selectDirection == "down") { |
| 0 | 617 topIDs <- top$table[(top$table$FDR < fdrThresh) & |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
618 (top$table$logFC < -lfcThresh), 1] |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
619 } |
| 3 | 620 |
| 0 | 621 write.table(top, file=topOut, row.names=FALSE, sep="\t") |
| 3 | 622 |
| 0 | 623 linkName <- paste0("Top Tags Table(", pairData[2], "-", pairData[1], |
| 624 ") (.tsv)") | |
| 625 linkAddr <- paste0("toptag(", pairData[2], "-", pairData[1], ").tsv") | |
| 626 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
| 627 | |
| 3 | 628 upCount[1] <- sum(top$table$FDR < fdrThresh & top$table$logFC > lfcThresh) |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
629 |
| 3 | 630 downCount[1] <- sum(top$table$FDR < fdrThresh & |
| 631 top$table$logFC < -lfcThresh) | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
632 |
| 3 | 633 flatCount[1] <- sum(top$table$FDR > fdrThresh | |
| 634 abs(top$table$logFC) < lfcThresh) | |
| 635 | |
| 636 | |
| 637 | |
| 0 | 638 # Select hairpins with FDR < 0.05 to highlight on plot |
| 639 png(smearPng, width=600, height=600) | |
| 640 plotTitle <- gsub(".", " ", | |
| 641 paste0("Smear Plot: ", pairData[2], "-", pairData[1]), | |
| 3 | 642 fixed=TRUE) |
| 0 | 643 plotSmear(testData, de.tags=topIDs, |
| 644 pch=20, cex=1.0, main=plotTitle) | |
| 3 | 645 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2) |
| 0 | 646 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ")") |
| 647 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1],").png") | |
| 648 imageData <- rbind(imageData, c(imgName, imgAddr)) | |
| 649 invisible(dev.off()) | |
| 650 | |
| 651 pdf(smearPdf) | |
| 652 plotTitle <- gsub(".", " ", | |
| 653 paste0("Smear Plot: ", pairData[2], "-", pairData[1]), | |
| 3 | 654 fixed=TRUE) |
| 0 | 655 plotSmear(testData, de.tags=topIDs, |
| 656 pch=20, cex=1.0, main=plotTitle) | |
| 3 | 657 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2) |
| 0 | 658 imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ") (.pdf)") |
| 659 imgAddr <- paste0("smear(", pairData[2], "-", pairData[1], ").pdf") | |
| 660 linkData <- rbind(linkData, c(imgName, imgAddr)) | |
| 661 invisible(dev.off()) | |
| 3 | 662 |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
663 } else if (workMode == "glm") { |
| 0 | 664 # Generating design information |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
665 if (secFactName == "none") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
666 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
667 factors <- factor(data$sample$group) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
668 design <- model.matrix(~0 + factors) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
669 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
670 colnames(design) <- gsub("factors", "", colnames(design), fixed=TRUE) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
671 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
672 } else { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
673 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
674 factors <- factor(data$sample$group) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
675 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
676 if (inputType == "counts") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
677 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
678 sampleNames <- colnames(counts) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
679 matchedIndex <- match(sampleNames, samples$ID) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
680 factors <- factor(samples$group[matchedIndex]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
681 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
682 secFactors <- factor(samples[[secFactName]][matchedIndex]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
683 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
684 } else if (inputType == "fastq" || inputType == "pairedFastq") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
685 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
686 secFactors <- factor(data$sample[[secFactName]]) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
687 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
688 } |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
689 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
690 design <- model.matrix(~0 + factors + secFactors) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
691 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
692 colnames(design) <- gsub("factors", "", colnames(design), fixed=TRUE) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
693 colnames(design) <- gsub("secFactors", secFactName, colnames(design), |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
694 fixed=TRUE) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
695 } |
| 0 | 696 |
| 697 | |
| 698 # Split up contrasts seperated by comma into a vector | |
| 699 contrastData <- unlist(strsplit(contrastData, split=",")) | |
| 3 | 700 |
| 0 | 701 for (i in 1:length(contrastData)) { |
| 702 # Generate contrasts information | |
| 703 contrasts <- makeContrasts(contrasts=contrastData[i], levels=design) | |
| 704 | |
| 705 # Fit negative bionomial GLM | |
| 3 | 706 fit <- glmFit(data, design) |
| 0 | 707 # Carry out Likelihood ratio test |
| 3 | 708 testData <- glmLRT(fit, contrast=contrasts) |
| 0 | 709 |
| 710 # Select hairpins with FDR < 0.05 to highlight on plot | |
| 711 top <- topTags(testData, n=Inf) | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
712 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
713 if (selectDirection == "all") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
714 topIDs <- top$table[(top$table$FDR < fdrThresh) & |
| 0 | 715 (abs(top$table$logFC) > lfcThresh), 1] |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
716 } else if (selectDirection == "up") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
717 topIDs <- top$table[(top$table$FDR < fdrThresh) & |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
718 (top$table$logFC > lfcThresh), 1] |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
719 } else if (selectDirection == "down") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
720 topIDs <- top$table[(top$table$FDR < fdrThresh) & |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
721 (top$table$logFC < -lfcThresh), 1] |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
722 } |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
723 |
| 0 | 724 write.table(top, file=topOut[i], row.names=FALSE, sep="\t") |
| 725 | |
| 726 linkName <- paste0("Top Tags Table(", contrastData[i], ") (.tsv)") | |
| 727 linkAddr <- paste0("toptag(", contrastData[i], ").tsv") | |
| 728 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
| 729 | |
| 3 | 730 # Collect counts for differential representation |
| 731 upCount[i] <- sum(top$table$FDR < fdrThresh & top$table$logFC > lfcThresh) | |
| 732 downCount[i] <- sum(top$table$FDR < fdrThresh & | |
| 733 top$table$logFC < -lfcThresh) | |
| 734 flatCount[i] <- sum(top$table$FDR > fdrThresh | | |
| 735 abs(top$table$logFC) < lfcThresh) | |
| 736 | |
| 0 | 737 # Make a plot of logFC versus logCPM |
| 738 png(smearPng[i], height=600, width=600) | |
| 739 plotTitle <- paste("Smear Plot:", gsub(".", " ", contrastData[i], | |
| 740 fixed=TRUE)) | |
| 741 plotSmear(testData, de.tags=topIDs, pch=20, cex=0.8, main=plotTitle) | |
| 742 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2) | |
| 743 | |
| 744 imgName <- paste0("Smear Plot(", contrastData[i], ")") | |
| 745 imgAddr <- paste0("smear(", contrastData[i], ").png") | |
| 746 imageData <- rbind(imageData, c(imgName, imgAddr)) | |
| 747 invisible(dev.off()) | |
| 748 | |
| 749 pdf(smearPdf[i]) | |
| 750 plotTitle <- paste("Smear Plot:", gsub(".", " ", contrastData[i], | |
| 751 fixed=TRUE)) | |
| 752 plotSmear(testData, de.tags=topIDs, pch=20, cex=0.8, main=plotTitle) | |
| 753 abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2) | |
| 754 | |
| 755 linkName <- paste0("Smear Plot(", contrastData[i], ") (.pdf)") | |
| 756 linkAddr <- paste0("smear(", contrastData[i], ").pdf") | |
| 757 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
| 758 invisible(dev.off()) | |
| 759 | |
| 760 genes <- as.character(data$genes$Gene) | |
| 761 unq <- unique(genes) | |
| 762 unq <- unq[!is.na(unq)] | |
| 763 geneList <- list() | |
| 764 for (gene in unq) { | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
765 if (length(which(genes == gene)) >= hairpinReq) { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
766 geneList[[gene]] <- which(genes == gene) |
| 0 | 767 } |
| 768 } | |
| 769 | |
| 770 if (wantRoast) { | |
| 771 # Input preparaton for roast | |
| 3 | 772 nrot <- 9999 |
| 0 | 773 set.seed(602214129) |
| 774 roastData <- mroast(data, index=geneList, design=design, | |
| 775 contrast=contrasts, nrot=nrot) | |
| 776 roastData <- cbind(GeneID=rownames(roastData), roastData) | |
| 777 write.table(roastData, file=roastOut[i], row.names=FALSE, sep="\t") | |
| 778 linkName <- paste0("Gene Level Analysis Table(", contrastData[i], | |
| 779 ") (.tsv)") | |
| 3 | 780 linkAddr <- paste0("gene_level(", contrastData[i], ").tsv") |
| 0 | 781 linkData <- rbind(linkData, c(linkName, linkAddr)) |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
782 if (selectOpt == "rank") { |
| 0 | 783 selectedGenes <- rownames(roastData)[selectVals] |
| 784 } else { | |
| 785 selectedGenes <- selectVals | |
| 786 } | |
| 787 | |
| 788 if (packageVersion("limma")<"3.19.19") { | |
| 789 png(barcodePng[i], width=600, height=length(selectedGenes)*150) | |
| 790 } else { | |
| 791 png(barcodePng[i], width=600, height=length(selectedGenes)*300) | |
| 792 } | |
| 793 par(mfrow=c(length(selectedGenes), 1)) | |
| 794 for (gene in selectedGenes) { | |
| 795 barcodeplot(testData$table$logFC, index=geneList[[gene]], | |
| 796 main=paste("Barcode Plot for", gene, "(logFCs)", | |
| 797 gsub(".", " ", contrastData[i])), | |
| 798 labels=c("Positive logFC", "Negative logFC")) | |
| 799 } | |
| 800 imgName <- paste0("Barcode Plot(", contrastData[i], ")") | |
| 801 imgAddr <- paste0("barcode(", contrastData[i], ").png") | |
| 802 imageData <- rbind(imageData, c(imgName, imgAddr)) | |
| 803 dev.off() | |
| 804 if (packageVersion("limma")<"3.19.19") { | |
| 805 pdf(barcodePdf[i], width=8, height=2) | |
| 806 } else { | |
| 807 pdf(barcodePdf[i], width=8, height=4) | |
| 808 } | |
| 809 for (gene in selectedGenes) { | |
| 810 barcodeplot(testData$table$logFC, index=geneList[[gene]], | |
| 811 main=paste("Barcode Plot for", gene, "(logFCs)", | |
| 812 gsub(".", " ", contrastData[i])), | |
| 813 labels=c("Positive logFC", "Negative logFC")) | |
| 814 } | |
| 815 linkName <- paste0("Barcode Plot(", contrastData[i], ") (.pdf)") | |
| 816 linkAddr <- paste0("barcode(", contrastData[i], ").pdf") | |
| 817 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
| 818 dev.off() | |
| 819 } | |
| 820 } | |
| 821 } | |
| 822 | |
| 3 | 823 # Generate data frame of the significant differences |
| 824 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount) | |
| 825 if (workMode == "glm") { | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
826 |
| 3 | 827 row.names(sigDiff) <- contrastData |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
828 |
| 3 | 829 } else if (workMode == "classic") { |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
830 |
| 3 | 831 row.names(sigDiff) <- paste0(pairData[2], "-", pairData[1]) |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
832 |
| 3 | 833 } |
| 834 | |
| 835 # Output table of summarised counts | |
| 836 ID <- rownames(data$counts) | |
| 837 outputCounts <- cbind(ID, data$counts) | |
| 838 write.table(outputCounts, file=countsOut, row.names=FALSE, sep="\t", | |
| 839 quote=FALSE) | |
| 840 linkName <- "Counts table (.tsv)" | |
| 841 linkAddr <- "counts.tsv" | |
| 842 linkData <- rbind(linkData, c(linkName, linkAddr)) | |
| 843 | |
| 844 # Record session info | |
| 845 writeLines(capture.output(sessionInfo()), sessionOut) | |
| 846 linkData <- rbind(linkData, c("Session Info", "session_info.txt")) | |
| 847 | |
| 1 | 848 # Record ending time and calculate total run time |
| 0 | 849 timeEnd <- as.character(Sys.time()) |
| 1 | 850 timeTaken <- capture.output(round(difftime(timeEnd,timeStart), digits=3)) |
| 851 timeTaken <- gsub("Time difference of ", "", timeTaken, fixed=TRUE) | |
| 0 | 852 ################################################################################ |
| 853 ### HTML Generation | |
| 854 ################################################################################ | |
| 855 # Clear file | |
| 856 cat("", file=htmlPath) | |
| 857 | |
| 858 cata("<html>\n") | |
| 859 HtmlHead("EdgeR Output") | |
| 860 | |
| 861 cata("<body>\n") | |
| 862 cata("<h3>EdgeR Analysis Output:</h3>\n") | |
| 863 cata("<h4>Input Summary:</h4>\n") | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
864 if (inputType == "fastq" || inputType == "pairedFastq") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
865 |
| 0 | 866 cata("<ul>\n") |
| 867 ListItem(hpReadout[1]) | |
| 868 ListItem(hpReadout[2]) | |
| 869 cata("</ul>\n") | |
| 3 | 870 cata(hpReadout[3], "<br />\n") |
| 0 | 871 cata("<ul>\n") |
| 872 ListItem(hpReadout[4]) | |
| 873 ListItem(hpReadout[7]) | |
| 874 cata("</ul>\n") | |
| 3 | 875 cata(hpReadout[8:11], sep="<br />\n") |
| 0 | 876 cata("<br />\n") |
| 877 cata("<b>Please check that read percentages are consistent with ") | |
| 878 cata("expectations.</b><br >\n") | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
879 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
880 } else if (inputType == "counts") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
881 |
| 0 | 882 cata("<ul>\n") |
| 883 ListItem("Number of Samples: ", ncol(data$counts)) | |
| 884 ListItem("Number of Hairpins: ", countsRows) | |
| 885 ListItem("Number of annotations provided: ", annoRows) | |
| 886 ListItem("Number of annotations matched to hairpin: ", annoMatched) | |
| 887 cata("</ul>\n") | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
888 |
| 0 | 889 } |
| 890 | |
| 891 cata("The estimated common biological coefficient of variation (BCV) is: ", | |
| 892 commonBCV, "<br />\n") | |
| 893 | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
894 if (secFactName == "none") { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
895 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
896 cata("No secondary factor specified.<br />\n") |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
897 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
898 } else { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
899 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
900 cata("Secondary factor specified as: ", secFactName, "<br />\n") |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
901 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
902 } |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
903 |
| 0 | 904 cata("<h4>Output:</h4>\n") |
| 3 | 905 cata("PDF copies of JPEGS available in 'Plots' section.<br />\n") |
| 0 | 906 for (i in 1:nrow(imageData)) { |
| 907 if (grepl("barcode", imageData$Link[i])) { | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
908 |
| 0 | 909 if (packageVersion("limma")<"3.19.19") { |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
910 |
| 0 | 911 HtmlImage(imageData$Link[i], imageData$Label[i], |
| 912 height=length(selectedGenes)*150) | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
913 |
| 0 | 914 } else { |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
915 |
| 0 | 916 HtmlImage(imageData$Link[i], imageData$Label[i], |
| 917 height=length(selectedGenes)*300) | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
918 |
| 0 | 919 } |
| 920 } else { | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
921 |
| 0 | 922 HtmlImage(imageData$Link[i], imageData$Label[i]) |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
923 |
| 0 | 924 } |
| 925 } | |
| 3 | 926 cata("<br />\n") |
| 927 | |
| 928 cata("<h4>Differential Representation Counts:</h4>\n") | |
| 929 | |
| 930 cata("<table border=\"1\" cellpadding=\"4\">\n") | |
| 931 cata("<tr>\n") | |
| 932 TableItem() | |
| 933 for (i in colnames(sigDiff)) { | |
| 934 TableHeadItem(i) | |
| 935 } | |
| 936 cata("</tr>\n") | |
| 937 for (i in 1:nrow(sigDiff)) { | |
| 938 cata("<tr>\n") | |
| 939 TableHeadItem(unmake.names(row.names(sigDiff)[i])) | |
| 940 for (j in 1:ncol(sigDiff)) { | |
| 941 TableItem(as.character(sigDiff[i, j])) | |
| 942 } | |
| 943 cata("</tr>\n") | |
| 944 } | |
| 945 cata("</table>") | |
| 0 | 946 |
| 947 cata("<h4>Plots:</h4>\n") | |
| 948 for (i in 1:nrow(linkData)) { | |
| 3 | 949 if (grepl(".pdf", linkData$Link[i])) { |
| 0 | 950 HtmlLink(linkData$Link[i], linkData$Label[i]) |
| 951 } | |
| 952 } | |
| 953 | |
| 954 cata("<h4>Tables:</h4>\n") | |
| 955 for (i in 1:nrow(linkData)) { | |
| 956 if (grepl(".tsv", linkData$Link[i])) { | |
| 957 HtmlLink(linkData$Link[i], linkData$Label[i]) | |
| 958 } | |
| 959 } | |
| 960 | |
| 3 | 961 cata("<p>Alt-click links to download file.</p>\n") |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
962 cata("<p>Click floppy disc icon on associated history item to download ") |
| 3 | 963 cata("all files.</p>\n") |
| 964 cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n") | |
| 0 | 965 |
| 1 | 966 cata("<h4>Additional Information:</h4>\n") |
| 967 | |
| 968 if (inputType == "fastq") { | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
969 |
| 1 | 970 ListItem("Data was gathered from fastq raw read file(s).") |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
971 |
| 1 | 972 } else if (inputType == "counts") { |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
973 |
| 1 | 974 ListItem("Data was gathered from a table of counts.") |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
975 |
| 1 | 976 } |
| 977 | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
978 if (cpmReq != 0 && sampleReq != 0) { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
979 tempStr <- paste("Target sequences without more than", cpmReq, |
| 3 | 980 "CPM in at least", sampleReq, "samples are insignificant", |
| 981 "and filtered out.") | |
| 1 | 982 ListItem(tempStr) |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
983 |
| 1 | 984 filterProp <- round(filteredCount/preFilterCount*100, digits=2) |
| 985 tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp, | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
986 "%) target sequences were filtered out for low ", |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
987 "count-per-million.") |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
988 ListItem(tempStr) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
989 } |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
990 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
991 if (sampleReq != 0) { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
992 tempStr <- paste("Samples that did not produce more than", sampleReq, |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
993 "counts were filtered out.") |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
994 ListItem(tempStr) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
995 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
996 tempStr <- paste0(sampleFilterCount, " samples were filtered out for low ", |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
997 "counts.") |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
998 ListItem(tempStr) |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
999 } |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
1000 |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
1001 if (exists("filteredSamples")) { |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
1002 tempStr <- paste("The following samples were filtered out for having zero", |
|
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
1003 "library size: ", filteredSamples) |
| 1 | 1004 ListItem(tempStr) |
| 1005 } | |
| 1006 | |
| 1007 if (workMode == "classic") { | |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
1008 ListItem("An exact test was performed on each target sequence.") |
| 1 | 1009 } else if (workMode == "glm") { |
|
12
e7cc9c011df0
- Changed dependency paths
shian_su <registertonysu@gmail.com>
parents:
3
diff
changeset
|
1010 ListItem("A generalised linear model was fitted to each target sequence.") |
| 1 | 1011 } |
| 1012 | |
| 1013 cit <- character() | |
| 1014 link <-character() | |
| 1015 link[1] <- paste0("<a href=\"", | |
| 1016 "http://www.bioconductor.org/packages/release/bioc/", | |
| 1017 "vignettes/limma/inst/doc/usersguide.pdf", | |
| 1018 "\">", "limma User's Guide", "</a>.") | |
| 1019 link[2] <- paste0("<a href=\"", | |
| 1020 "http://www.bioconductor.org/packages/release/bioc/", | |
| 1021 "vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf", | |
| 1022 "\">", "edgeR User's Guide", "</a>") | |
| 1023 | |
| 1024 cit[1] <- paste("Robinson MD, McCarthy DJ and Smyth GK (2010).", | |
| 1025 "edgeR: a Bioconductor package for differential", | |
| 1026 "expression analysis of digital gene expression", | |
| 1027 "data. Bioinformatics 26, 139-140") | |
| 1028 cit[2] <- paste("Robinson MD and Smyth GK (2007). Moderated statistical tests", | |
| 1029 "for assessing differences in tag abundance. Bioinformatics", | |
| 1030 "23, 2881-2887") | |
| 1031 cit[3] <- paste("Robinson MD and Smyth GK (2008). Small-sample estimation of", | |
| 1032 "negative binomial dispersion, with applications to SAGE data.", | |
| 1033 "Biostatistics, 9, 321-332") | |
| 1034 | |
| 1035 cit[4] <- paste("McCarthy DJ, Chen Y and Smyth GK (2012). Differential", | |
| 1036 "expression analysis of multifactor RNA-Seq experiments with", | |
| 1037 "respect to biological variation. Nucleic Acids Research 40,", | |
| 1038 "4288-4297") | |
| 1039 | |
| 1040 cata("<h4>Citations</h4>") | |
| 1041 cata("<ol>\n") | |
| 1042 ListItem(cit[1]) | |
| 1043 ListItem(cit[2]) | |
| 1044 ListItem(cit[3]) | |
| 1045 ListItem(cit[4]) | |
| 1046 cata("</ol>\n") | |
| 1047 | |
| 3 | 1048 cata("<p>Report problems to: su.s@wehi.edu.au</p>\n") |
| 1049 | |
| 1050 for (i in 1:nrow(linkData)) { | |
| 1051 if (grepl("session_info", linkData$Link[i])) { | |
| 1052 HtmlLink(linkData$Link[i], linkData$Label[i]) | |
| 1053 } | |
| 1054 } | |
| 1055 | |
| 1 | 1056 cata("<table border=\"0\">\n") |
| 0 | 1057 cata("<tr>\n") |
| 1058 TableItem("Task started at:"); TableItem(timeStart) | |
| 1059 cata("</tr>\n") | |
| 1060 cata("<tr>\n") | |
| 1061 TableItem("Task ended at:"); TableItem(timeEnd) | |
| 1062 cata("</tr>\n") | |
| 1 | 1063 cata("<tr>\n") |
| 1064 TableItem("Task run time:"); TableItem(timeTaken) | |
| 1065 cata("<tr>\n") | |
| 1066 cata("</table>\n") | |
| 0 | 1067 |
| 1068 cata("</body>\n") | |
| 1069 cata("</html>") |
