2
|
1 #!/usr/bin/Rscript
|
|
2
|
|
3 ## --------------------
|
|
4 ## prints all arguments as msg
|
|
5 ## --------------------
|
|
6 catMsg <- function( msg=c() ){
|
|
7 cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='')
|
|
8 }
|
|
9 ## --------------------
|
28
|
10 ## return the location of this script
|
2
|
11 ## --------------------
|
|
12 getScriptPath <- function(){
|
|
13 cmd.args <- commandArgs()
|
|
14 m <- regexpr("(?<=^--file=).+", cmd.args, perl=TRUE)
|
|
15 script.dir <- dirname(regmatches(cmd.args, m))
|
42
|
16 if( length(script.dir) == 0 ) stop("[ERR] Can't determine script dir: please call the script with Rscript\n")
|
|
17 if( length(script.dir) > 1 ) stop("[ERR] Can't determine script dir: more than one '--file' argument detected\n")
|
2
|
18 return(script.dir)
|
|
19 }
|
|
20 ## --------------------
|
28
|
21 ## Some html creation functions
|
2
|
22 ## --------------------
|
|
23 htmlTableRow <- function( string_array=c() ){
|
|
24 td_cells <- ''
|
|
25 for ( i in string_array ){
|
|
26 td_cells <- paste( td_cells, '<td>', i, '</td>', sep='' )
|
|
27 }
|
|
28 return( paste( "<tr>", td_cells, "</tr>") )
|
|
29 }
|
|
30 htmlLink <- function( path, desc="LINK" ){
|
|
31 return( paste( '<a href="', path, '">', desc, "</a>", sep='') )
|
|
32 }
|
|
33 ## --------------------
|
28
|
34 ## constructs a list with input bam file info
|
2
|
35 ## --------------------
|
|
36 makeBamFileList <- function( paths, names ){
|
|
37 tmp <- list()
|
|
38 l1 <- length(paths)
|
|
39 l2 <- length(names)
|
42
|
40 if ( l1 != l2 ) stop( "Unequal amount of bam-paths (", l1, ") and -names (", l2, ") in makeBamFileList!!!\n" )
|
72
|
41 if ( l1 == 0 ){ return(tmp) } # empty list in debug mode
|
|
42
|
2
|
43 for ( i in 1:length(paths) ){
|
|
44 path <- paths[i]
|
|
45 name <- names[i]
|
|
46 file <- basename(path)
|
|
47
|
|
48 tmp[[ file ]] <- name
|
|
49 tmp[[ 'all_paths' ]] <- c( tmp[[ 'all_paths' ]], path )
|
|
50 tmp[[ 'all_files' ]] <- c( tmp[[ 'all_files' ]], file )
|
|
51 tmp[[ 'all_names' ]] <- c( tmp[[ 'all_names' ]], name )
|
|
52 }
|
|
53 return( tmp )
|
|
54 }
|
|
55
|
|
56 ## --------------------
|
28
|
57 ## copied code for extracting the regions by segment call status
|
2
|
58 ## --------------------
|
64
|
59 fuseRegions <- function( obj, minRatio=0 ) {
|
|
60 if ( ncol(obj) > 1 ) stop('Please specify which sample...')
|
2
|
61
|
64
|
62 data <- data.frame( obj@featureData@data[,1:3], copynumber(obj), segmented(obj), check.names=FALSE, stringsAsFactors=FALSE)
|
|
63 colnames( data ) <- c( "chr", "start", "end", "log2", "segmentval" )
|
2
|
64
|
|
65 fused.data <- data.frame()
|
|
66 curr.bin <- 1
|
59
|
67 for ( chr in unique( data$chr ) ) {
|
|
68 chr.data <- data[ data$chr == chr, ]
|
|
69 prev.bin <- curr.bin
|
|
70 prev.log2 <- chr.data[ 1, 'log2' ]
|
|
71 prev.segm <- chr.data[ 1, 'segmentval' ]
|
|
72 start <- chr.data[ 1, 'start' ]
|
2
|
73
|
|
74 if ( nrow(chr.data) > 1) {
|
|
75 for ( i in 2:nrow(chr.data) ) {
|
59
|
76 curr.bin <- curr.bin + 1
|
2
|
77 curr.segm <- chr.data[ i, 'segmentval']
|
|
78
|
|
79 if ( curr.segm != prev.segm ) {
|
64
|
80 fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], segmentval=round(prev.segm, digits=DECIMALS) ) )
|
2
|
81 prev.segm <- curr.segm
|
59
|
82 prev.bin <- curr.bin
|
|
83 start <- chr.data[ i, 'start']
|
2
|
84 }
|
|
85 }
|
64
|
86 fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], segmentval=round(prev.segm, digits=DECIMALS) ) )
|
59
|
87 }else{
|
64
|
88 fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], segmentval=round(prev.segm, digits=DECIMALS) ) )
|
2
|
89 }
|
|
90 }
|
64
|
91 ## remove regions with low amplitude
|
|
92 fused.data <- fused.data[ abs(fused.data$segmentval) >= minRatio, ]
|
2
|
93 fused.data
|
|
94 }
|
|
95
|
|
96 ## DESC: takes the output of fuse.regions and outputs a txt file per sample
|
64
|
97 outputRegionsFromList <- function ( regionsList, outputBasename, outputDir="./", binSize, storeList ){
|
2
|
98 if ( missing(regionsList) ) stop( 'Please provide regionsList...' )
|
|
99 if ( missing(outputBasename) ) stop( 'Please provide outputBasename...' )
|
|
100 if ( !is.list(regionsList) ) stop( 'Input not a list...?' )
|
|
101 if ( length(regionsList) < 1 ) stop( 'List seems empty...?' )
|
42
|
102 if ( file.exists( outputDir ) ) catMsg( c(" Using dir ", outputDir, " for output") )
|
2
|
103 else dir.create( outputDir )
|
|
104
|
|
105 ## have to set R output options otherwise scientific method is used at some point
|
|
106 options( "scipen"=100 )
|
|
107
|
|
108 sampleCount <- length( regionsList )
|
|
109 sampleNames <- names( regionsList )
|
|
110 bedgraphColumns <- c( 'chr', 'start', 'end', 'segmentval' )
|
30
|
111
|
42
|
112 catMsg( c( " There are ", sampleCount, " samples found in input list") )
|
2
|
113
|
|
114 for ( sample in sampleNames ){
|
42
|
115 catMsg( c(" Working on sample ", sample ) )
|
2
|
116 regionCount <- nrow( regionsList[[sample]] )
|
|
117
|
64
|
118 outSampleBase <- paste( outputBasename, '_', sample, '_', binSize, 'kbp', sep='')
|
2
|
119 outBedgraphFile <- paste( outSampleBase, '.bedGraph', sep="" )
|
|
120 outBedgraphPath <- paste( outputDir, '/', outBedgraphFile, sep="" )
|
|
121
|
|
122 ## ---------- BEDGRAPH ----------
|
64
|
123 txt <- paste( "track type=bedGraph color=0,100,0 altColor=255,0,0 name=", sample," description=segmented_regions_from_QDNAseq_",binSize,"kbp\n", sep="")
|
2
|
124 sink( outBedgraphPath )
|
|
125 cat( txt )
|
|
126 sink()
|
|
127 write.table( regionsList[[sample]][,bedgraphColumns], outBedgraphPath, quote=F, sep="\t", row.names=F, append=T, col.names=F)
|
64
|
128 #outFiles[[sample]] <- c( outBedgraphFile )
|
|
129 storeList[[ paste( binSize, sample, 'bedgraph', sep="_")]] <- outBedgraphFile
|
2
|
130 }
|
64
|
131 return(storeList)
|
2
|
132 }
|
|
133
|
78
|
134 ## ==================================================
|
|
135 ## Unused but potential usefull code
|
|
136 ## ==================================================
|
|
137 #@ a bit hacky galaxy way to allow an unknown number of output files based on param selection
|
|
138 #@ see: https://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files
|
|
139 #historyName <- paste(binSize, 'kbp-IGV', sep="")
|
|
140 #igvFile <- paste( newFilePath, "/primary_", outputId, "_", historyName, "_visible_txt", sep="" )
|
64
|
141
|
2
|
142 ## ==================================================
|
|
143 ## Start of analysis
|
|
144 ## ==================================================
|
42
|
145 MAIN_NAME <- '[INFO] '
|
2
|
146 catMsg( "Starting QDNAseq wrapper" )
|
78
|
147 #catMsg( R.version.string )
|
2
|
148 catMsg( "Loading R libraries" )
|
73
|
149
|
74
|
150 ## supress msg to allow R to finish with non-error msg
|
2
|
151 suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) )
|
|
152 suppressWarnings( suppressMessages( library( CGHcall, quietly = TRUE ) ) )
|
25
|
153
|
2
|
154 ## only one param: the tmp config file
|
|
155 cmdLineArgs <- commandArgs(TRUE)
|
|
156 config <- cmdLineArgs[1]
|
74
|
157 TOOL_PATH <- cmdLineArgs[2]
|
|
158 CSS_FILE <- paste( TOOL_PATH, '/static/css/QDNAseq.css', sep="" )
|
|
159 DECIMALS <- 3
|
|
160 WEB_LINK <- 'http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html'
|
|
161 PURE_CSS <- 'http://yui.yahooapis.com/pure/0.5.0/pure-min.css'
|
2
|
162
|
|
163 ## sourcing the config file will load all input params
|
59
|
164 ## many variables are imported via sourced "config"
|
2
|
165 source( config )
|
|
166
|
78
|
167 ## if calling requested we always need segmenting first as well
|
|
168 if ( doCall ){ doSegment <- TRUE }
|
|
169
|
64
|
170 ## desparate tries to make png text scale well, damn you R...!
|
|
171 PLOT_RES <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 6.3
|
|
172 PAR_SET <- list( pch=22 )
|
2
|
173
|
42
|
174 systemUser <- system("whoami",T)
|
|
175 qdnaseqVersion <- packageDescription( "QDNAseq" )$Version
|
59
|
176 rVersion <- R.version.string
|
64
|
177 startTime <- Sys.time()
|
|
178 analysisStart <- as.character( startTime )
|
|
179 catMsg( c("QDNAseq version: ", qdnaseqVersion) )
|
59
|
180 catMsg( c( rVersion ) )
|
42
|
181
|
2
|
182 ## get the comma separated list of chromosomes to exclude
|
|
183 excludeChrs <- unlist( strsplit( excludeChrsString, ",") )
|
78
|
184
|
|
185 ## format binSizes back to integers because stupid galaxy doesn't do what I want
|
|
186 #print( binSizesString )
|
|
187 binSizes <- gsub( 'kb', '', binSizesString ) # remove the kb string to get integers
|
|
188 #print( binSizes )
|
|
189 binSizes <- gsub( 'bin', '', binSizes ) # remove the kb string to get integers
|
|
190 #print( binSizes )
|
|
191 binSizes <- as.numeric( unlist( strsplit( binSizes, ",") ) )
|
|
192 #print( binSizes )
|
64
|
193
|
2
|
194
|
|
195 ## ------------------------
|
|
196 ## DEBUG
|
64
|
197 if ( debug ){
|
72
|
198 catMsg( c("Analysis run by user: ", systemUser ) )
|
|
199 catMsg( c("DEBUG SessionInfo: " ) )
|
64
|
200 print( sessionInfo() )
|
|
201 }
|
2
|
202 ## /DEBUG
|
|
203 ## ------------------------
|
|
204
|
64
|
205 ## prepare output dir
|
2
|
206 if ( !file.exists( outputPath) ){
|
|
207 dir.create( outputPath )
|
|
208 }
|
|
209
|
|
210 ## copy source config file to output dir to include it in output zip
|
72
|
211 if ( inGalaxy ){
|
|
212 file.copy( config, paste(outputPath, 'galaxyConfigFile.R', sep='/') )
|
|
213 }
|
2
|
214
|
59
|
215 ## setup bam filelist for easy retrieval later
|
|
216 fileList <- makeBamFileList( bamsPaths, bamsNames )
|
|
217 bamCount <- length( fileList[[ 'all_paths' ]] )
|
2
|
218
|
64
|
219 gzipOutputName <- paste( 'QDNAseqResults_', outputName, '.zip', sep='' )
|
|
220 gzipOutputPath <- paste( outputPath, '/', gzipOutputName, sep='')
|
|
221 htmlOutputName <- 'index.html'
|
|
222 htmlOutputPath <- paste( outputPath, '/', htmlOutputName, sep='')
|
|
223
|
|
224 plotted_images <- list() # to keep track of images for later linking
|
|
225 regions <- list() # will contain the segments
|
|
226
|
2
|
227 ## ------------------------
|
64
|
228 ## in case of debug just use inbuilt LGG data for speedup
|
|
229 if ( debug ){
|
78
|
230 catMsg( c("Built in data only contains binsize 15kb so overriding chosen binSizes to single 15kb") )
|
64
|
231 binSizes <- c(15)
|
|
232 bamsPaths <- c( "BUILD_IN_DATA")
|
59
|
233 bamsNames <- c( "LGG150")
|
|
234 fileList <- makeBamFileList( bamsPaths, bamsNames )
|
|
235 bamCount <- length( fileList[[ 'all_paths' ]] )
|
2
|
236 }
|
|
237
|
64
|
238 for ( binSize in binSizes ){
|
|
239
|
78
|
240 catMsg( c("Starting analysis for binSize: ", binSize) )
|
64
|
241 ## ------------------------
|
|
242 ## construct output file-names and -paths
|
|
243 ## ------------------------
|
78
|
244 rdsReadName <- paste( binSize, 'kbp_QDNAseqReadCounts.rds', sep='')
|
|
245 rdsCopyName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.rds', sep='')
|
|
246 rdsSegmName <- paste( binSize, 'kbp_QDNAseqCopyNumbersSegmented.rds', sep='')
|
|
247 rdsCallName <- paste( binSize, 'kbp_QDNAseqCopyNumbersCalled.rds', sep='')
|
|
248 igvCopyName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.igv', sep='')
|
|
249 igvSegmName <- paste( binSize, 'kbp_QDNAseqCopyNumbersSegmented.igv', sep='')
|
|
250 igvCallName <- paste( binSize, 'kbp_QDNAseqCopyNumbersCalled.igv', sep='')
|
|
251
|
64
|
252 regiOutputName <- paste( binSize, 'kbp_QDNAseqRegions.rds', sep='')
|
|
253 noiseImgName <- paste( binSize, 'kbp_QDNAseqNoiseplot.png', sep='')
|
|
254
|
78
|
255 rdsRegiPath <- paste( outputPath, '/', regiOutputName, sep='')
|
|
256 noiseImgPath <- paste( outputPath, '/', noiseImgName, sep='')
|
64
|
257
|
72
|
258 binAnnFile <- paste( TOOL_PATH, '/static/binannotation/', binSize, 'kbp_binAnnotations.rds', sep="" )
|
|
259 if ( file.exists(binAnnFile) ){
|
|
260 binAnnotations <- readRDS( binAnnFile )
|
|
261 catMsg( c("Using local binAnnotations file" ) )
|
|
262 }else{
|
|
263 binAnnotations <- getBinAnnotations( binSize=binSize, type=experimentType )
|
|
264 }
|
64
|
265
|
|
266 ## in case of debug just use inbuilt LGG data for speedup
|
|
267 if ( debug ){
|
78
|
268 data( LGG150 )
|
64
|
269 readCounts <- LGG150
|
|
270 }else{
|
|
271 ## provide bamnames because in galaxy everyting is called "dataset_###"
|
|
272 readCounts <- binReadCounts( binAnnotations, bamfiles=fileList[[ 'all_paths' ]], bamnames=fileList[[ 'all_names' ]] )
|
|
273 }
|
2
|
274
|
64
|
275 readCountsFiltered <- applyFilters( readCounts, residual=TRUE, blacklist=filterBlacklistedBins, mappability=mappabilityCutoff, chromosomes=excludeChrs )
|
|
276 readCountsFiltered <- estimateCorrection( readCountsFiltered )
|
|
277 copyNumbers <- correctBins( readCountsFiltered )
|
|
278 copyNumbersNormalized <- normalizeBins( copyNumbers )
|
|
279 copyNumbersSmooth <- smoothOutlierBins( copyNumbersNormalized )
|
|
280 sampleNames <- readCountsFiltered@phenoData@data$name
|
|
281
|
78
|
282 ## set file to output if output requested
|
|
283 outputData <- copyNumbersSmooth
|
|
284 outputType <- 'copynumber'
|
|
285 outputLogT <- TRUE
|
|
286 rdsReadPath <- paste( outputPath, '/', rdsReadName, sep='')
|
|
287 saveRDS( readCounts, rdsReadPath );
|
|
288 rdsPath <- paste( outputPath, '/', rdsCopyName, sep='')
|
|
289 igvPath <- paste( outputPath, '/', igvCopyName, sep='')
|
|
290
|
|
291 ## proceed with segmenting / calling if requested
|
|
292 if ( doSegment ){
|
79
|
293 copyNumbersSegmented <- segmentBins( copyNumbersSmooth, undo.splits=undoSplits, undo.SD=undoSD, transformFun=c("sqrt") )
|
78
|
294 copyNumbersSegmented <- normalizeSegmentedBins( copyNumbersSegmented )
|
|
295 outputData <- copyNumbersSegmented
|
|
296 outputType <- 'segments'
|
|
297 igvPath <- paste( outputPath, '/', rdsSegmName, sep='')
|
|
298 rdsPath <- paste( outputPath, '/', igvSegmName, sep='')
|
|
299 }
|
|
300 if ( doCall ){
|
|
301 copyNumbersCalled <- callBins(copyNumbersSegmented)
|
|
302 outputData <- copyNumbersCalled
|
|
303 outputType <- 'calls'
|
|
304 outputLogT <- FALSE # call values should not be transformed at output
|
|
305 rdsPath <- paste( outputPath, '/', rdsCallName, sep='')
|
|
306 igvPath <- paste( outputPath, '/', igvCallName, sep='')
|
|
307 }
|
|
308
|
|
309 ## save the QDNAseq objects and tsv file of highest level (calls or segments)
|
|
310 saveRDS( outputData, rdsPath );
|
|
311 exportBins( outputData, file=igvPath, format="igv", type=outputType, logTransform=outputLogT )
|
64
|
312
|
|
313 ## also save objects for galaxy history output if requested
|
78
|
314 if ( txt2history ){
|
|
315 fileId <- paste('txt_', binSize, sep='')
|
|
316 historyOutputPath <- historyOutputFiles[[ fileId ]]
|
|
317 catMsg( c("About to export igv/txt file to history for ", binSize, "kbp bin") )
|
|
318 exportBins( outputData, file=historyOutputPath, format="igv", type=outputType, logTransform=outputLogT )
|
64
|
319 }
|
78
|
320 if ( rds2history ){
|
|
321 fileId <- paste('rds_', binSize, sep='')
|
|
322 rdsHistoryOutputPath <- historyOutputFiles[[ fileId ]]
|
|
323 catMsg( c("About to export rds file to history for ", binSize, "kbp bin") )
|
|
324 saveRDS( outputData, file=rdsHistoryOutputPath )
|
64
|
325 }
|
2
|
326
|
64
|
327 ## ------------------------
|
|
328 ## create output files
|
|
329 ## ------------------------
|
|
330 png( noiseImgPath, width=PLOT_HEIGHT, height=PLOT_HEIGHT, res=PLOT_RES );
|
|
331 par( PAR_SET )
|
|
332 noisePlot( readCountsFiltered, main=paste( "Noise Plot ", binSize, "kbp", sep=''), col="darkgreen" )
|
|
333 dev.off()
|
|
334
|
|
335 binSize <- as.character( binSize ) # to avoid R using it as array index... (*#$^@ you R!)
|
|
336 binSizeString <- paste( binSize, 'kbp', sep='')
|
78
|
337 cgh <- makeCgh( outputData ) # needed for fuseRegions function
|
64
|
338
|
|
339 for (i in 1:length(sampleNames) ){
|
|
340
|
|
341 sample <- sampleNames[i]
|
|
342 usedReads <- readCountsFiltered@phenoData@data$used.reads[i]
|
|
343 catMsg( c("Creating plots for sample: ", sample, " (", binSizeString, ")" ) )
|
|
344
|
|
345 type <- 'CopyNumbers'
|
|
346 img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='')
|
|
347 img_file_path <- paste( outputPath, '/', img_file, sep='' )
|
2
|
348
|
64
|
349 ## COPYNUMBER PLOT
|
|
350 png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES );
|
|
351 par( PAR_SET )
|
68
|
352 plot( copyNumbersSmooth[ ,sample ], main=paste(sample, ": CopyNumbers", sep="") )
|
78
|
353 mtext( paste( "(", binSizeString, " bins)", sep=""), 3 )
|
64
|
354 abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
|
|
355 dev.off()
|
|
356
|
|
357 plotted_images[[ paste(binSize, sample, type, sep="_" ) ]] <- img_file
|
|
358
|
|
359 if ( doSegment ){
|
|
360 type <- 'Segmented'
|
|
361 img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='')
|
|
362 img_file_path <- paste( outputPath, '/', img_file, sep='' )
|
|
363
|
78
|
364 ## COPYNUMBER + SEGMENTS PLOT
|
64
|
365 png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES );
|
|
366 par( PAR_SET )
|
78
|
367 plot( copyNumbersSegmented[ ,sample ], main=paste(sample, ": CopyNumbers (Segmented)", sep="") )
|
69
|
368 mtext( paste( "(", binSizeString, " bins)", sep=""), 3 )
|
64
|
369 abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
|
|
370 dev.off()
|
|
371
|
|
372 plotted_images[[ paste(binSize, sample, type, sep="_" ) ]] <- img_file
|
|
373
|
78
|
374 ## if segmented we can also retrieve the segment locations
|
64
|
375 catMsg( c(" Fusing regions of sample: ", sample) )
|
|
376 regions[[ sample ]] <- fuseRegions( cgh[, sample] )
|
|
377
|
|
378 region_count <- nrow( data.frame( regions[[ sample ]] ) )
|
|
379 catMsg( c( ' sample "', sample, '" has ', region_count, " regions" ) )
|
|
380 plotted_images[[ paste(binSize, sample, 'region_count', sep="_" ) ]] <- region_count
|
|
381 }
|
78
|
382
|
|
383 if ( doCall ){
|
|
384 type <- 'Called'
|
|
385 img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='')
|
|
386 img_file_path <- paste( outputPath, '/', img_file, sep='' )
|
|
387
|
|
388 ## COPYNUMBER + SEGMENTS + CALLS PLOT
|
|
389 png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES );
|
|
390 par( PAR_SET )
|
|
391 plot( copyNumbersCalled[ ,sample ], main=paste(sample, ": CopyNumbers (Segmented and Called)", sep="") )
|
|
392 mtext( paste( "(", binSizeString, " bins)", sep=""), 3 )
|
|
393 abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
|
|
394 dev.off()
|
|
395
|
|
396 plotted_images[[ paste(binSize, sample, type, sep="_" ) ]] <- img_file
|
|
397 }
|
64
|
398
|
|
399 ## add USED read counts
|
|
400 plotted_images[[ paste(binSize, sample, 'usedReads', sep="_" ) ]] <- usedReads
|
2
|
401 }
|
|
402
|
64
|
403 if ( doSegment ){
|
78
|
404 saveRDS( regions, rdsRegiPath )
|
64
|
405 plotted_images <- outputRegionsFromList( regions, outputBasename=outputName, outputDir=outputPath, binSize=binSize, storeList=plotted_images )
|
|
406 }
|
|
407 }# end bin
|
28
|
408
|
2
|
409
|
64
|
410 ## ----- debug -----
|
|
411 #catMsg( "done" )
|
|
412 #q(status=0)
|
|
413 ## ---- /debug -----
|
2
|
414
|
|
415
|
|
416 ## ------------------------
|
|
417 ## prepare output
|
|
418 ## ------------------------
|
42
|
419 catMsg( "...zipping output")
|
2
|
420 zip_cmd <- paste( "zip -j", gzipOutputPath, paste(outputPath,'/*',sep='') ) ## -j is for removing dirs from the tree
|
|
421 system( zip_cmd )
|
|
422
|
|
423 ## ------------------------
|
|
424 ## get filesizes for report
|
|
425 ## ------------------------
|
64
|
426 zippedSize <- paste( round( file.info( gzipOutputPath )[["size"]] / 1e+6, digits=2 ), 'MB' )
|
|
427 endTime <- Sys.time()
|
|
428 timeDiff <- format( round( endTime - startTime, 3 ) )
|
|
429 analysisEnd <- as.character( endTime )
|
2
|
430
|
|
431 ## ------------------------
|
|
432 ## creating html output to be linked to from the middle galaxy pane
|
|
433 ## ------------------------
|
|
434 sink( file = htmlOutputPath, type = "output" )
|
|
435 cat( "<html>\n")
|
|
436 cat( "<head>\n")
|
64
|
437
|
59
|
438 cat( "\t", '<title>QDNAseq Report | ', outputName,'</title>', "\n", sep='' )
|
|
439 cat( "\t", '<link rel="stylesheet" href="', PURE_CSS, '">', "\n", sep='' )
|
|
440 cat( "\t<style>\n", sep='')
|
|
441 ## include CSS into html file, makes it more portable
|
|
442 cat( "\t\t", readLines( CSS_FILE ), sep="\n\t\t" )
|
2
|
443 #cat( "\t\th1 {color:red;}", "\n")
|
59
|
444 cat( "\n\t</style>\n" )
|
2
|
445
|
|
446 cat( "\n</head>\n")
|
|
447 cat( "\n<body>\n")
|
|
448
|
|
449 cat( "<h1>QDNAseq Report</h1>", "\n")
|
|
450
|
|
451 cat( '<h3 class="qdnaseq">About this analysis</h3>', "\n")
|
59
|
452 cat( '<p>This page provides access to all results. To have a local copy of this report just download the <a href="', gzipOutputName, '" class="button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep='')
|
2
|
453
|
|
454 ## ------------------------
|
|
455 ## table with general info
|
|
456 ## ------------------------
|
|
457 cat( '<h3 class="qdnaseq">Settings</h3><p>', "\n")
|
59
|
458 cat( '<table class="pure-table pure-table-striped"><tbody>' )
|
2
|
459 cat( htmlTableRow( c( "AnalysisName", outputName ) ) )
|
64
|
460 cat( htmlTableRow( c( "AnalysisStart", analysisStart ) ) )
|
|
461 cat( htmlTableRow( c( "AnalysisEnd", analysisEnd ) ) )
|
|
462 cat( htmlTableRow( c( "AnalysisTime", timeDiff ) ) )
|
|
463 cat( htmlTableRow( c( "BinSizes (kbp)", paste(binSizes,collapse=", ") ) ) )
|
59
|
464 cat( htmlTableRow( c( "R info", rVersion ) ) )
|
|
465 cat( htmlTableRow( c( "QDNAseq info", qdnaseqVersion ) ) )
|
2
|
466
|
59
|
467 sampleStrings <- c()
|
2
|
468 for ( galaxyName in fileList[[ 'all_files' ]] ){
|
|
469 sampleName <- fileList[[ galaxyName ]]
|
59
|
470 sampleStrings <- c( sampleStrings, paste( galaxyName, ' (', sampleName, ')', sep='' ) )
|
2
|
471 }
|
59
|
472 cat( htmlTableRow( c( "InputBams", paste( sampleStrings, collapse=", ") ) ) )
|
|
473
|
2
|
474 cat( "</tbody></table></p>", "\n")
|
|
475
|
|
476 ## ------------------------
|
|
477 ## list with links to all output files
|
|
478 ## ------------------------
|
|
479 cat( '<h3 class="qdnaseq">Output files</h3><p>', "\n")
|
78
|
480 cat( '<p>This table contains output files that can be used for local downstream analysis with the bioconductor QDNAseq package. For each bin-size / data-level there is a R data structure file with data of all samples. See ', htmlLink( WEB_LINK, 'the bioconductor QDNAseq documentation' ), ' for more information on how to work with these files.</p>', "\n", sep='')
|
64
|
481 cat( '<table class="pure-table pure-table-striped">', "\n" )
|
|
482 cat( '<thead><th>Type</th>', as.vector( mapply( paste, "<th>", binSizes, "kbp</th>", sep="" ) ),'</thead>', "\n" )
|
|
483 cat( "<tbody>", "\n")
|
|
484 files <- list()
|
78
|
485 #fileTypes <- c( 'ReadCounts.rds', 'CopyNumbers.rds' )
|
|
486 fileTypes <- c( 'ReadCounts.rds' )
|
|
487 if ( doCall ){
|
|
488 fileTypes <- c( fileTypes, 'CopyNumbersCalled.rds')
|
|
489 }else if ( doSegment ){
|
|
490 fileTypes <- c( fileTypes, 'CopyNumbersSegmented.rds')
|
|
491 }else {
|
|
492 fileTypes <- c( fileTypes, 'CopyNumbers.rds')
|
|
493 }
|
2
|
494
|
64
|
495 for ( fileType in fileTypes ){
|
|
496 fileNames <- mapply( paste, binSizes, paste( 'kbp_QDNAseq', fileType, sep=''), sep='')
|
|
497 fileLinks <- mapply( htmlLink, fileNames, paste( binSizes, "kbp", sep="" ) )
|
|
498 cat( htmlTableRow( c( fileType, fileLinks ) ) )
|
|
499 }
|
|
500 cat( "\n</tbody></table></p>", "\n")
|
2
|
501
|
|
502 ## ------------------------
|
|
503 ## table with links to files
|
|
504 ## ------------------------
|
64
|
505 ratio <- PLOT_WIDTH / PLOT_HEIGHT
|
|
506 width <- 960; height <- width / ratio ## bigger img
|
|
507 width_t <- 100; height_t <- 40 ## thumb img
|
|
508
|
2
|
509 cat( '<h3 class="qdnaseq">Results: overview</h3><p>', "\n")
|
78
|
510 cat( '<p>This table contains the visual results of the copy number aberration analysis. You can click on an image to jump to the larger version. If segmentation was performed as well the number of segments is shown and a file with genomic regions can be downloaded (just remember to inspect the results carefully as this is, together with optional calling afterwards, a more experimental type of analysis).</p>', "\n", sep='')
|
2
|
511 plots_html <- ''
|
64
|
512
|
|
513 colspan <- 1
|
|
514 binHeader <- "<th>Image</th>"
|
|
515 if ( doSegment ){ # extra column with segment info
|
|
516 colspan <- 2
|
|
517 binHeader <- "<th>Image</th><th>Segments</th>"
|
|
518 }
|
|
519 cat( '<table class="pure-table pure-table-striped">', "\n" )
|
|
520 cat( '<thead><tr><th></th><th></th>', as.vector( mapply( paste, "<th colspan=\"", colspan,"\">", binSizes, "kbp</th>", sep="" ) ), '</tr></thead>' )
|
|
521 cat( '<thead><tr><th>Sample / File</th><th>Reads</th>', rep( binHeader, length(binSizes) ), '</tr></thead>' )
|
|
522 cat( '<tbody>' )
|
|
523
|
|
524 for ( bam_file in bamsNames ){
|
2
|
525
|
64
|
526 usedReads <- plotted_images[[ paste(binSize, bam_file, 'usedReads', sep="_" ) ]]
|
2
|
527 usedReads <- format( as.integer(usedReads), digits=4, decimal.mark=".", big.mark="," )
|
64
|
528 htmlRow <- paste( '<tr><td>', bam_file, '</td><td>', usedReads, '</td>', sep='' )
|
2
|
529
|
64
|
530 for ( binSize in binSizes ){
|
|
531
|
|
532 ## add thumbnails to table with links to anchors on html page
|
|
533 copy_img <- plotted_images[[ paste(binSize, bam_file, 'CopyNumbers', sep="_" ) ]]
|
|
534 html_copy_thumb <- htmlLink( path=paste('#', copy_img, sep=''), paste('<img src="',copy_img,'" alt="', bam_file, '" width="', width_t, '" height="', height_t, '">', sep='') )
|
|
535 html_copy_img <- htmlLink( path=copy_img, paste('<img id="', copy_img,'" src="',copy_img,'" alt="',bam_file, '" width="', width, '" height="', height, '">', sep='') )
|
|
536 html_segm_img <- ''
|
78
|
537 html_call_img <- ''
|
64
|
538 html_bedGraph <- ''
|
|
539 region_count <- ''
|
|
540 htmlRow <- paste( htmlRow, '<td>', html_copy_thumb, '</td>' )
|
2
|
541
|
64
|
542 if ( doSegment ){
|
|
543 segm_img <- plotted_images[[ paste(binSize, bam_file, 'Segmented', sep="_" ) ]]
|
|
544 region_count <- plotted_images[[ paste(binSize, bam_file, 'region_count', sep="_" ) ]]
|
2
|
545
|
64
|
546 html_bedGraph <- htmlLink( path=plotted_images[[ paste(binSize, bam_file, 'bedgraph', sep="_" ) ]], 'bedGraph' )
|
|
547 html_segm_img <- htmlLink( path=segm_img, paste('<img id="', segm_img,'" src="', segm_img,'" alt="', bam_file, '" width="', width, '" height="', height,'">', sep='') )
|
|
548 htmlRow <- paste( htmlRow, '<td>', region_count, ' (', html_bedGraph, ')</td>', sep="" )
|
|
549 }
|
78
|
550 if ( doCall ){
|
|
551 call_img <- plotted_images[[ paste(binSize, bam_file, 'Called', sep="_" ) ]]
|
|
552 html_call_img <- htmlLink( path=call_img, paste('<img id="', call_img,'" src="', call_img,'" alt="', bam_file, '" width="', width, '" height="', height,'">', sep='') )
|
|
553 }
|
|
554 plots_html <- paste( plots_html, html_copy_img, "\n", html_segm_img, "\n", html_call_img, "\n<br \\>\n", sep='' )
|
2
|
555 }
|
64
|
556 plots_html <- paste( plots_html, "\n<hr \\>\n", sep='' )
|
2
|
557 ## add info to overview table, including small thumbnails
|
64
|
558 htmlRow <- paste( htmlRow, '</tr>', sep='' )
|
|
559 cat( htmlRow, "\n" )
|
2
|
560 }
|
|
561 cat( "</tbody></table></p>", "\n")
|
|
562
|
|
563 ## ------------------------
|
|
564 ## section with various output shown
|
|
565 ## ------------------------
|
42
|
566 cat( '<h3 class="qdnaseq">Results: Sample plots</h3><p>', "\n")
|
64
|
567 ## now include (large) images in html page
|
2
|
568 cat( plots_html, "\n")
|
|
569 cat( "\n</p></body>\n")
|
|
570 cat( "\n</html>\n")
|
|
571 sink()
|
|
572
|
|
573 ## ------------------------
|
42
|
574 ## creating main html output for galaxy history
|
2
|
575 ## ------------------------
|
72
|
576 if ( inGalaxy ){ # dont create when running outside Galaxy
|
|
577 sink( file = outputHtml, type = "output" )
|
|
578
|
2
|
579 cat( "<head>", "\n")
|
59
|
580 cat( "\t", '<link rel="stylesheet" href="', PURE_CSS, '">', "\n", sep='' )
|
2
|
581
|
|
582 cat( "<style>", "\n")
|
42
|
583 ## include CSS directly into html file
|
2
|
584 cat( paste( "\t", '/* the css here originates from ', CSS_FILE,' */', "\n") )
|
|
585 cat( paste( "\t", readLines( CSS_FILE, n = -1)), sep="\n" )
|
|
586 cat( "</style>", "\n")
|
|
587 cat( "</head>", "\n")
|
|
588
|
|
589 cat( '<h1>QDNAseq Results (', outputName,')</h1>', "\n", sep="")
|
59
|
590 cat( '<p>Explore <a href="', htmlOutputName, '" class="button">the results</a> directly within galaxy</p>', "\n", sep="")
|
|
591 cat( '<p>Or download a <a href="', gzipOutputName, '" class="button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep="" )
|
2
|
592
|
72
|
593 sink()
|
|
594 }
|
2
|
595
|
42
|
596 ## ------------------------
|
|
597 ## create final zip and quit with status 0 to tell galaxy all was fine
|
|
598 ## ------------------------
|
|
599 catMsg( "zipping all output")
|
|
600 system( paste( "zip -j ", gzipOutputPath, paste(outputPath,'/', htmlOutputName, sep='') ) )
|
|
601 catMsg( "done" )
|
25
|
602 q(status=0)
|