# HG changeset patch
# User stef
# Date 1425476534 18000
# Node ID 81ba2f857fe259b3908a6645f6a3024e7c008f45
# Parent acf62630e4b5dfba7f72cb8d454fa2887a98bbbf
Uploaded
diff -r acf62630e4b5 -r 81ba2f857fe2 QDNAseq-plot.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/QDNAseq-plot.R Wed Mar 04 08:42:14 2015 -0500
@@ -0,0 +1,51 @@
+#!/usr/bin/Rscript
+
+## --------------------
+## prints all arguments as msg
+## --------------------
+catMsg <- function( msg=c() ){
+ cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='')
+}
+
+
+## ==================================================
+## Start of analysis
+## ==================================================
+MAIN_NAME <- '[INFO] '
+catMsg( "Starting QDNAseq-plot wrapper" )
+catMsg( "Loading R libraries" )
+
+## supress msg to allow R to finish with non-error msg
+suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) )
+
+## only one param: the tmp config file
+cmdLineArgs <- commandArgs(TRUE)
+config <- cmdLineArgs[1]
+
+## sourcing the config file will load all input params
+## many variables are imported via sourced "config"
+source( config ) # outputPngPath, outputPdfPath, allOrOne, rdsFilePath
+#cat( "ALL? ", allOrOne, sep='' )
+
+## desparate tries to make png text scale well, damn you R...!
+PLOT_RES <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 6.3
+PAR_SET <- list( pch=22 )
+systemUser <- system("whoami",T)
+qdnaseqVersion <- packageDescription( "QDNAseq" )$Version
+rVersion <- R.version.string
+catMsg( c("QDNAseq version: ", qdnaseqVersion) )
+catMsg( c( rVersion ) )
+
+qdnaseqObject <- readRDS( rdsFilePath )
+## COPYNUMBER PLOT
+sample <- SAMPLE_INDEX
+png( outputPngPath, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES )
+ par( PAR_SET )
+ plot( qdnaseqObject[ ,sample ] )
+ #mtext( "plotted in galaxy", 3 )
+ abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
+dev.off()
+
+
+## all ok
+q(status=0)
diff -r acf62630e4b5 -r 81ba2f857fe2 QDNAseq-plot.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/QDNAseq-plot.xml Wed Mar 04 08:42:14 2015 -0500
@@ -0,0 +1,154 @@
+
+
+
+
+
+
+
+
+
+
+ Plot QDNAseq copy-number/segments/calls profile
+
+
+
+ QDNAseq-plot.R
+ $cfg
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ^[^\s\\]*$
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+## Desc: this file is sourced in QDNAseq-plot.R wrapper script
+## as means to pass all galaxy params to R
+
+## -----
+## required params
+## -----
+"${jobName}" -> outputName
+"${outputPng}" -> outputPngPath
+"${rdsFile}" -> rdsFilePath
+as.integer( "${sample_index}" ) -> SAMPLE_INDEX
+
+## -----
+## extra options
+## -----
+as.integer( "${advanced.plot_width}" ) -> PLOT_WIDTH
+as.integer( "${advanced.plot_height}" ) -> PLOT_HEIGHT
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Introduction**
+
+This tool is a wrapper for the plot function of the R Bioconductor package QDNAseq_
+
+.. _QDNAseq: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
+
+-----
+
+.. class:: warningmark
+
+As there is no R 3.1.0 galaxy-package yet (a requirement for QDNAseq), the **dependencies** need to be installed by hand and available to the user under which galaxy runs: R (3.1.0) and bioconductor package QDNAseq (>= 1.2.2). In case the default R is not 3.1.0, also the wrapper xml must be updated to include the correct path during installation of this tool.
+
+-----
+
+**Citation**
+
+For the underlying QDNAseq R package please cite:
+Scheinin I, Sie D, Bengtsson H, van de Wiel MA, Olshen AB, van Thuijl HF, van Essen HF, Eijk PP, Rustenburg F, Meijer GA, Reijneveld JC, Wesseling P, Pinkel D, Albertson DG and Ylstra B (2014). “DNA copy number analysis of fresh and formalin-fixed specimens by shallow whole-genome sequencing with identification and exclusion of problematic regions in the genome assembly.” Genome Research. doi:10.1101/gr.175141.114.
+
+See also the bioconductor package_ documentation.
+
+.. _package: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
+
+.. image:: LGG150_copynumber.png
+.. image:: LGG150_copynumberSegmented.png
+
+
+
+
diff -r acf62630e4b5 -r 81ba2f857fe2 QDNAseq.R
--- a/QDNAseq.R Thu Nov 06 05:48:52 2014 -0500
+++ b/QDNAseq.R Wed Mar 04 08:42:14 2015 -0500
@@ -131,14 +131,21 @@
return(storeList)
}
+## ==================================================
+## Unused but potential usefull code
+## ==================================================
+#@ a bit hacky galaxy way to allow an unknown number of output files based on param selection
+#@ see: https://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files
+#historyName <- paste(binSize, 'kbp-IGV', sep="")
+#igvFile <- paste( newFilePath, "/primary_", outputId, "_", historyName, "_visible_txt", sep="" )
## ==================================================
## Start of analysis
## ==================================================
MAIN_NAME <- '[INFO] '
catMsg( "Starting QDNAseq wrapper" )
+#catMsg( R.version.string )
catMsg( "Loading R libraries" )
-catMsg( R.version.string )
## supress msg to allow R to finish with non-error msg
suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) )
@@ -157,6 +164,9 @@
## many variables are imported via sourced "config"
source( config )
+## if calling requested we always need segmenting first as well
+if ( doCall ){ doSegment <- TRUE }
+
## desparate tries to make png text scale well, damn you R...!
PLOT_RES <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 6.3
PAR_SET <- list( pch=22 )
@@ -171,7 +181,15 @@
## get the comma separated list of chromosomes to exclude
excludeChrs <- unlist( strsplit( excludeChrsString, ",") )
-binSizes <- as.numeric( unlist( strsplit( binSizesString, ",") ) )
+
+## format binSizes back to integers because stupid galaxy doesn't do what I want
+#print( binSizesString )
+binSizes <- gsub( 'kb', '', binSizesString ) # remove the kb string to get integers
+#print( binSizes )
+binSizes <- gsub( 'bin', '', binSizes ) # remove the kb string to get integers
+#print( binSizes )
+binSizes <- as.numeric( unlist( strsplit( binSizes, ",") ) )
+#print( binSizes )
## ------------------------
@@ -205,11 +223,11 @@
plotted_images <- list() # to keep track of images for later linking
regions <- list() # will contain the segments
-outputFiles <- list()
## ------------------------
## in case of debug just use inbuilt LGG data for speedup
if ( debug ){
+ catMsg( c("Built in data only contains binsize 15kb so overriding chosen binSizes to single 15kb") )
binSizes <- c(15)
bamsPaths <- c( "BUILD_IN_DATA")
bamsNames <- c( "LGG150")
@@ -219,22 +237,23 @@
for ( binSize in binSizes ){
+ catMsg( c("Starting analysis for binSize: ", binSize) )
## ------------------------
## construct output file-names and -paths
## ------------------------
- robjReadCoName <- paste( binSize, 'kbp_QDNAseqReadCounts.rds', sep='')
- robjCopyNrName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.rds', sep='')
- igvCopyNrName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.igv', sep='')
- robjSegmntName <- paste( binSize, 'kbp_QDNAseqCopyNumbersSegmented.rds', sep='')
+ rdsReadName <- paste( binSize, 'kbp_QDNAseqReadCounts.rds', sep='')
+ rdsCopyName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.rds', sep='')
+ rdsSegmName <- paste( binSize, 'kbp_QDNAseqCopyNumbersSegmented.rds', sep='')
+ rdsCallName <- paste( binSize, 'kbp_QDNAseqCopyNumbersCalled.rds', sep='')
+ igvCopyName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.igv', sep='')
+ igvSegmName <- paste( binSize, 'kbp_QDNAseqCopyNumbersSegmented.igv', sep='')
+ igvCallName <- paste( binSize, 'kbp_QDNAseqCopyNumbersCalled.igv', sep='')
+
regiOutputName <- paste( binSize, 'kbp_QDNAseqRegions.rds', sep='')
noiseImgName <- paste( binSize, 'kbp_QDNAseqNoiseplot.png', sep='')
- robjReadCoPath <- paste( outputPath, '/', robjReadCoName, sep='')
- robjCopyNrPath <- paste( outputPath, '/', robjCopyNrName, sep='')
- robjSegmntPath <- paste( outputPath, '/', robjSegmntName, sep='')
- robjRegionPath <- paste( outputPath, '/', regiOutputName, sep='')
- igvCopyNrPath <- paste( outputPath, '/', igvCopyNrName, sep='')
- noiseImgPath <- paste( outputPath, '/', noiseImgName, sep='')
+ rdsRegiPath <- paste( outputPath, '/', regiOutputName, sep='')
+ noiseImgPath <- paste( outputPath, '/', noiseImgName, sep='')
binAnnFile <- paste( TOOL_PATH, '/static/binannotation/', binSize, 'kbp_binAnnotations.rds', sep="" )
if ( file.exists(binAnnFile) ){
@@ -246,7 +265,7 @@
## in case of debug just use inbuilt LGG data for speedup
if ( debug ){
- data(LGG150)
+ data( LGG150 )
readCounts <- LGG150
}else{
## provide bamnames because in galaxy everyting is called "dataset_###"
@@ -260,27 +279,49 @@
copyNumbersSmooth <- smoothOutlierBins( copyNumbersNormalized )
sampleNames <- readCountsFiltered@phenoData@data$name
- ## save objects to output dir
- saveRDS( readCountsFiltered, robjReadCoPath );
- saveRDS( copyNumbersSmooth, robjCopyNrPath );
- exportBins( copyNumbersSmooth, file=igvCopyNrPath, format="igv" )
+ ## set file to output if output requested
+ outputData <- copyNumbersSmooth
+ outputType <- 'copynumber'
+ outputLogT <- TRUE
+ rdsReadPath <- paste( outputPath, '/', rdsReadName, sep='')
+ saveRDS( readCounts, rdsReadPath );
+ rdsPath <- paste( outputPath, '/', rdsCopyName, sep='')
+ igvPath <- paste( outputPath, '/', igvCopyName, sep='')
+
+ ## proceed with segmenting / calling if requested
+ if ( doSegment ){
+ copyNumbersSegmented <- segmentBins( copyNumbersSmooth, undo.splits=undoSplits, undo.SD=undoSD, transformFun="sqrt" )
+ copyNumbersSegmented <- normalizeSegmentedBins( copyNumbersSegmented )
+ outputData <- copyNumbersSegmented
+ outputType <- 'segments'
+ igvPath <- paste( outputPath, '/', rdsSegmName, sep='')
+ rdsPath <- paste( outputPath, '/', igvSegmName, sep='')
+ }
+ if ( doCall ){
+ copyNumbersCalled <- callBins(copyNumbersSegmented)
+ outputData <- copyNumbersCalled
+ outputType <- 'calls'
+ outputLogT <- FALSE # call values should not be transformed at output
+ rdsPath <- paste( outputPath, '/', rdsCallName, sep='')
+ igvPath <- paste( outputPath, '/', igvCallName, sep='')
+ }
+
+ ## save the QDNAseq objects and tsv file of highest level (calls or segments)
+ saveRDS( outputData, rdsPath );
+ exportBins( outputData, file=igvPath, format="igv", type=outputType, logTransform=outputLogT )
## also save objects for galaxy history output if requested
- if ( doOutputCopynumbersIgv ){
- #@ a bit hacky galaxy way to allow an unknown number of output files based on param selection
- #@ see: https://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files
- historyName <- paste(binSize, 'kbp-IGV', sep="")
- igvFile <- paste( newFilePath, "/primary_", outputId, "_", historyName, "_visible_txt", sep="" )
- exportBins( copyNumbersSmooth, file=igvFile, format="igv" )
- catMsg( c("Exported igv file to history for ", binSize, "kbp bin") )
+ if ( txt2history ){
+ fileId <- paste('txt_', binSize, sep='')
+ historyOutputPath <- historyOutputFiles[[ fileId ]]
+ catMsg( c("About to export igv/txt file to history for ", binSize, "kbp bin") )
+ exportBins( outputData, file=historyOutputPath, format="igv", type=outputType, logTransform=outputLogT )
}
-
- ## proceed with calling if requested
- if ( doSegment ){
- copyNumbersSegmented <- segmentBins( copyNumbersSmooth, undo.splits=undoSplits, undo.SD=undoSD )
- copyNumbersSegmented <- normalizeSegmentedBins( copyNumbersSegmented )
- cgh <- makeCgh( copyNumbersSegmented )
- saveRDS( copyNumbersSegmented, robjSegmntPath );
+ if ( rds2history ){
+ fileId <- paste('rds_', binSize, sep='')
+ rdsHistoryOutputPath <- historyOutputFiles[[ fileId ]]
+ catMsg( c("About to export rds file to history for ", binSize, "kbp bin") )
+ saveRDS( outputData, file=rdsHistoryOutputPath )
}
## ------------------------
@@ -293,6 +334,7 @@
binSize <- as.character( binSize ) # to avoid R using it as array index... (*#$^@ you R!)
binSizeString <- paste( binSize, 'kbp', sep='')
+ cgh <- makeCgh( outputData ) # needed for fuseRegions function
for (i in 1:length(sampleNames) ){
@@ -308,7 +350,7 @@
png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES );
par( PAR_SET )
plot( copyNumbersSmooth[ ,sample ], main=paste(sample, ": CopyNumbers", sep="") )
- mtext( paste( binSizeString, " bins", sep=""), 3 )
+ mtext( paste( "(", binSizeString, " bins)", sep=""), 3 )
abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
dev.off()
@@ -319,16 +361,17 @@
img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='')
img_file_path <- paste( outputPath, '/', img_file, sep='' )
- ## COPYNUMBER PLOT
+ ## COPYNUMBER + SEGMENTS PLOT
png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES );
par( PAR_SET )
- plot( copyNumbersSegmented[ ,sample ], main=paste(sample, ": CopyNumbers and Segments", sep="") )
+ plot( copyNumbersSegmented[ ,sample ], main=paste(sample, ": CopyNumbers (Segmented)", sep="") )
mtext( paste( "(", binSizeString, " bins)", sep=""), 3 )
abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
dev.off()
plotted_images[[ paste(binSize, sample, type, sep="_" ) ]] <- img_file
+ ## if segmented we can also retrieve the segment locations
catMsg( c(" Fusing regions of sample: ", sample) )
regions[[ sample ]] <- fuseRegions( cgh[, sample] )
@@ -336,13 +379,29 @@
catMsg( c( ' sample "', sample, '" has ', region_count, " regions" ) )
plotted_images[[ paste(binSize, sample, 'region_count', sep="_" ) ]] <- region_count
}
+
+ if ( doCall ){
+ type <- 'Called'
+ img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='')
+ img_file_path <- paste( outputPath, '/', img_file, sep='' )
+
+ ## COPYNUMBER + SEGMENTS + CALLS PLOT
+ png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES );
+ par( PAR_SET )
+ plot( copyNumbersCalled[ ,sample ], main=paste(sample, ": CopyNumbers (Segmented and Called)", sep="") )
+ mtext( paste( "(", binSizeString, " bins)", sep=""), 3 )
+ abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
+ dev.off()
+
+ plotted_images[[ paste(binSize, sample, type, sep="_" ) ]] <- img_file
+ }
## add USED read counts
plotted_images[[ paste(binSize, sample, 'usedReads', sep="_" ) ]] <- usedReads
}
if ( doSegment ){
- saveRDS( regions, robjRegionPath )
+ saveRDS( regions, rdsRegiPath )
plotted_images <- outputRegionsFromList( regions, outputBasename=outputName, outputDir=outputPath, binSize=binSize, storeList=plotted_images )
}
}# end bin
@@ -418,13 +477,20 @@
## list with links to all output files
## ------------------------
cat( '
Output files
', "\n")
- cat( '
This table contains output files that can be used for local downstream analysis with the bioconductor QDNAseq package. For each bin-size / data-level there is a R data structure file with data of all samples. See ', htmlLink( WEB_LINK, 'the bioconductor QDNAseq documentation' ), ' for more information on how to work with these files
', "\n", sep='')
+ cat( '
This table contains output files that can be used for local downstream analysis with the bioconductor QDNAseq package. For each bin-size / data-level there is a R data structure file with data of all samples. See ', htmlLink( WEB_LINK, 'the bioconductor QDNAseq documentation' ), ' for more information on how to work with these files.
This table contains the visual results of the copy number aberration analysis. You can click on an image to jump to the larger version. If segmentation was performed as well the number of segments is shown and a file with genomic regions can be downloaded (just remember to inspect the results carefully as this is a more exprimental step).
', "\n", sep='')
+ cat( '
This table contains the visual results of the copy number aberration analysis. You can click on an image to jump to the larger version. If segmentation was performed as well the number of segments is shown and a file with genomic regions can be downloaded (just remember to inspect the results carefully as this is, together with optional calling afterwards, a more experimental type of analysis).