changeset 42:4351c7715275 draft

Uploaded
author stef
date Thu, 19 Jun 2014 12:35:36 -0400
parents 2489b20dc7ab
children 327b8830d49f
files QDNAseq.R
diffstat 1 files changed, 77 insertions(+), 79 deletions(-) [+]
line wrap: on
line diff
--- a/QDNAseq.R	Thu Jun 19 12:35:21 2014 -0400
+++ b/QDNAseq.R	Thu Jun 19 12:35:36 2014 -0400
@@ -13,8 +13,8 @@
     cmd.args <- commandArgs()
     m <- regexpr("(?<=^--file=).+", cmd.args, perl=TRUE)
     script.dir <- dirname(regmatches(cmd.args, m))
-    if(length(script.dir) == 0) stop("[ERR] Can't determine script dir: please call the script with Rscript\n")
-    if(length(script.dir) > 1) stop("[ERR] Can't determine script dir: more than one '--file' argument detected\n")
+    if( length(script.dir) == 0 ) stop("[ERR] Can't determine script dir: please call the script with Rscript\n")
+    if( length(script.dir) > 1 ) stop("[ERR] Can't determine script dir: more than one '--file' argument detected\n")
     return(script.dir)
 }
 ## --------------------
@@ -37,7 +37,7 @@
 	tmp <- list()
 	l1 <- length(paths)
 	l2 <- length(names)
-	if ( l1 != l2 ) stop( "Unequal amount of bam-paths (",l1,") and -names (",l2,") in makeBamFileList!!!\n" )
+	if ( l1 != l2 ) stop( "Unequal amount of bam-paths (", l1, ") and -names (", l2, ") in makeBamFileList!!!\n" )
 	for ( i in 1:length(paths) ){
 		path <- paths[i]
 		name <- names[i]
@@ -82,7 +82,7 @@
 					
 					fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], call=prev.call, segmentval=round(prev.segm, digits=DECIMALS) ) )
 					if ( prev.call != 0 ){
-						cat( MAIN_NAME, " ...found called/segmented region (", chr, ':', start, ' call=', prev.call, ' segment=', prev.segm, ")\n", sep="" )
+						catMsg( c(" ...found called/segmented region (", chr, ':', start, ' call=', prev.call, ' segment=', prev.segm, ")" ) )
 					}
 					prev.call <- curr.call
 					prev.segm <- curr.segm
@@ -107,7 +107,7 @@
 	if ( missing(outputBasename) ) stop( 'Please provide outputBasename...' )
 	if ( !is.list(regionsList) ) stop( 'Input not a list...?' )
 	if ( length(regionsList) < 1 ) stop( 'List seems empty...?' )
-	if ( file.exists( outputDir ) ) cat( MAIN_NAME, " Using dir ", outputDir, " for output\n", sep="")
+	if ( file.exists( outputDir ) ) catMsg( c(" Using dir ", outputDir, " for output") )
 	else dir.create( outputDir )
 	outFiles <- list()
 
@@ -118,10 +118,10 @@
 	sampleNames <- names( regionsList )
 	bedgraphColumns <- c( 'chr', 'start', 'end', 'segmentval' )
 	
-	cat( MAIN_NAME, " There are ", sampleCount, " samples found in input list...\n", sep="")
+	catMsg( c( " There are ", sampleCount, " samples found in input list") )
 
 	for ( sample in sampleNames ){		
-		cat( MAIN_NAME, " Working on sample ", sample, "\n", sep="")
+		catMsg( c(" Working on sample ", sample ) )
 		regionCount <- nrow( regionsList[[sample]] )
 		
 		outSampleBase   <- paste( outputBasename, '_', sample, '_QDNAseqRegions', sep='')
@@ -148,31 +148,11 @@
 	outFiles
 }
 
-#printIgvFile <- function( dat, filename ){
-#	
-#	if ( 'calls' %in% assayDataElementNames(dat) ) {
-#	 	#output <- paste(output, '-called.igv', sep="")
-#	  	cat('#type=COPY_NUMBER\n#track coords=1\n', file=filename)
-#	  	df <- data.frame(chromosome=as.character(chromosomes(dat)), start=bpstart(dat), end=bpend(dat), feature=featureNames(dat), calls(dat), check.names=FALSE, stringsAsFactors=FALSE)
-#	}else {
-#	  #output <- paste(output, '-normalized.igv', sep="")
-#	  cat('#type=COPY_NUMBER\n#track coords=1\n', file=filename)
-#	  df <- data.frame(chromosome=as.character(chromosomes(dat)), start=bpstart(dat), end=bpend(dat), feature=featureNames(dat), round(copynumber(dat), digits=2), check.names=FALSE, stringsAsFactors=FALSE)
-#	}
-#
-#	df$chromosome[df$chromosome == '23'] <- 'X'
-#	df$chromosome[df$chromosome == '24'] <- 'Y'
-#	df$chromosome[df$chromosome == '25'] <- 'MT'
-#	#return( df )
-#	write.table( df, file=filename, append=TRUE, quote=FALSE, sep='\t', row.names=FALSE )	
-#}
-
-
 ## ==================================================
 ## Start of analysis
 ## ==================================================
+MAIN_NAME <- '[INFO] '
 TOOL_PATH <- getScriptPath()
-MAIN_NAME <- '[INFO] '
 CSS_FILE  <- paste( TOOL_PATH, '/QDNAseq.css', sep="" )
 DECIMALS  <- 3
 WEB_LINK  <- 'http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html'
@@ -182,21 +162,22 @@
 suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) )
 suppressWarnings( suppressMessages( library( CGHcall, quietly = TRUE ) ) )
 
-systemUser <- system("whoami",T)
-qdnaseqVersion <- packageDescription( "QDNAseq" )$Version
-catMsg( c("Analysis run with user: ", systemUser ) )
-catMsg( c("QDNAseq version loaded: ", qdnaseqVersion) )
-
 ## only one param: the tmp config file
 cmdLineArgs <- commandArgs(TRUE)
 config      <- cmdLineArgs[1]
 
 ## sourcing the config file will load all input params
+## many variables are created in sourced "config"
 source( config )
 
-## if call output requested, set doCall such that we will segment and call
+PLOT_RES  <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 6.3 # desparate try to make png text scale well, damn you R...!
 if ( doOutputCallsRds == TRUE ){ doCall <- TRUE }
 
+systemUser <- system("whoami",T)
+qdnaseqVersion <- packageDescription( "QDNAseq" )$Version
+catMsg( c("Analysis run with user: ", systemUser ) )
+catMsg( c("QDNAseq version loaded: ", qdnaseqVersion) )
+
 ## get the comma separated list of chromosomes to exclude
 excludeChrs <- unlist( strsplit( excludeChrsString, ",") )
 
@@ -210,16 +191,12 @@
 ## ------------------------
 
 ## setup bam filelist for easy retrieval later
-#catMsg( "Setting up input bam list" )
-#cat( bamsPaths, "\n" )
-#catMsg( "Namews" )
-#cat( bamsNames, "\n" )
 fileList    <- makeBamFileList( bamsPaths, bamsNames )
 bamCount    <- length( fileList[[ 'all_paths' ]] )
 
-## help msg still needs work!
+## help msg for running script separate still needs work!
 #if ( length(cmdLineArgs) == 0 || cmdLineArgs[1] == "-h" || cmdLineArgs[1] == "--help"){ 
-#	cat( paste( MAIN_NAME, "Usage: ", params_help, sep=''), "\n" )
+#	catMsg( c( "Usage: ", paste(params_help,sep=",") ) )
 #	quit( save = 'no', status=0, runLast=F )
 #}
 
@@ -242,15 +219,20 @@
 regiOutputName <- paste( binSize, 'kbp_QDNAseqRegions.rds', sep='')
 igvCalledName  <- paste( binSize, 'kbp_QDNAseq-calls.igv', sep='')
 igvCopyNrName  <- paste( binSize, 'kbp_QDNAseq-normalized.igv', sep='')
+noiseImgName   <- paste( 'QDNAseqNoisePlot.png',  sep='')
+freqImgName    <- paste( 'QDNAseqFrequencyPlot.png',  sep='')
 
-gzipOutputPath <- paste( outputPath, '/', gzipOutputName, sep="")
-htmlOutputPath <- paste( outputPath, '/', htmlOutputName, sep="")
-robjReadCoPath <- paste( outputPath, '/', robjReadCoName, sep="")
-robjCopyNrPath <- paste( outputPath, '/', robjCopyNrName, sep="")
-robjCalledPath <- paste( outputPath, '/', robjCalledName, sep="")
-robjRegionPath <- paste( outputPath, '/', regiOutputName, sep="")
-igvCalledPath  <- paste( outputPath, '/', igvCalledName, sep="")
-igvCopyNrPath  <- paste( outputPath, '/', igvCopyNrName, sep="")
+
+gzipOutputPath <- paste( outputPath, '/', gzipOutputName, sep='')
+htmlOutputPath <- paste( outputPath, '/', htmlOutputName, sep='')
+robjReadCoPath <- paste( outputPath, '/', robjReadCoName, sep='')
+robjCopyNrPath <- paste( outputPath, '/', robjCopyNrName, sep='')
+robjCalledPath <- paste( outputPath, '/', robjCalledName, sep='')
+robjRegionPath <- paste( outputPath, '/', regiOutputName, sep='')
+igvCalledPath  <- paste( outputPath, '/', igvCalledName, sep='')
+igvCopyNrPath  <- paste( outputPath, '/', igvCopyNrName, sep='')
+noiseImgPath   <- paste( outputPath, '/', noiseImgName, sep='' )
+freqImgPath    <- paste( outputPath, '/', freqImgName, sep='' )
 
 
 ## ------------------------
@@ -282,9 +264,9 @@
 sampleNames           <- readCountsFiltered@phenoData@data$name
 
 ## save objects to output dir
-saveRDS( readCounts, robjReadCoPath );
+saveRDS( readCountsFiltered, robjReadCoPath );
 saveRDS( copyNumbersSmooth, robjCopyNrPath );
-exportBins(copyNumbersSmooth, file=igvCopyNrPath, format="igv")
+exportBins( copyNumbersSmooth, file=igvCopyNrPath, format="igv" )
 
 ## also save objects for galaxy history output if requested
 if ( doOutputReadcountsRds ){
@@ -305,6 +287,7 @@
 		saveRDS( copyNumbersCalled, calledSegmentsDatasetFile );
 	}
 	exportBins( copyNumbersCalled, file=igvCalledPath, format="igv")
+	
 }
 
 
@@ -315,12 +298,17 @@
 plotted_images <- list() # to keep track of images for later linking
 regions <- list() # will contain the (called) segments
 
-noise_img_file <- paste( binSize, 'kbp_QDNAseqNoisePlot.png',  sep='')
-noise_img_file_path <- paste( outputPath, '/', noise_img_file, sep='' )
-png( noise_img_file_path, width=480, height=480 ); 
-	noisePlot( readCountsFiltered )
+png( noiseImgPath, width=PLOT_HEIGHT, height=PLOT_HEIGHT, res=PLOT_RES ); 
+	noisePlot( readCountsFiltered, col="darkgreen" )
 dev.off()
 
+if ( doCall ){
+	png( freqImgPath, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); 
+		frequencyPlot( copyNumbersCalled )
+	dev.off()	
+}
+
+
 for (i in 1:length(sampleNames) ){
 #for (sample in sampleNames(copyNumbersSmooth) ){
 	sample <- sampleNames[i]
@@ -330,22 +318,24 @@
 	type <- 'CopyNumbers'
 	img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png',  sep='')
 	img_file_path <- paste( outputPath, '/', img_file, sep='' )
-	png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT ); plot( copyNumbersSmooth[ ,sample ] ); dev.off()
+	png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); 
+		plot( copyNumbersSmooth[ ,sample ] ); 
+	dev.off()
 	plotted_images[[ sample ]][[ type ]] <- img_file
 
 	if ( doCall ){
 		type <- 'Called'
 		img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png',  sep='')
 		img_file_path <- paste( outputPath, '/', img_file, sep='' )
-		png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT ); 
+		png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); 
 			plot( copyNumbersCalled[ ,sample ] ); 
 		dev.off()
 		plotted_images[[ sample ]][[ type ]] <- img_file
 
-		cat( MAIN_NAME, " Fusing regions of sample: ", sample, "\n", sep="")
+		catMsg( c(" Fusing regions of sample: ", sample) )
 		regions[[ sample ]] <- fuse.regions_test( cgh[, sample] )
 		region_count <- nrow( data.frame( regions[[ sample ]] ) )
-		cat( MAIN_NAME, ' sample "', sample, '" has ', region_count, " regions\n", sep="" )
+		catMsg( c( ' sample "', sample, '" has ', region_count, " regions" ) )
 		plotted_images[[ sample ]][[ 'region_count' ]] <- region_count
 	}
 
@@ -361,7 +351,7 @@
 ## ------------------------
 ## prepare output
 ## ------------------------
-cat( MAIN_NAME, "...zipping output\n")
+catMsg( "...zipping output")
 zip_cmd <- paste( "zip -j", gzipOutputPath, paste(outputPath,'/*',sep='') ) ## -j is for removing dirs from the tree
 system( zip_cmd )
 
@@ -385,7 +375,6 @@
 		cat( "<html>\n")
 		cat( "<head>\n")
 			#cat( '<link rel="stylesheet" type="text/css" href="test.css" media="screen" />', "\n" )
-			#cat( '<link rel="stylesheet" type="text/css" href="../../../../static/style/test.css" media="screen" />', 
 			cat( "\t", '<link rel="stylesheet" href="http://yui.yahooapis.com/pure/0.4.2/pure-min.css">', "\n" )
 			cat( "\t<style>\n")
 				## have to include CSS into html file, because css referencing outside own dir doesn't seem to work...
@@ -401,9 +390,7 @@
 		cat( "<h1>QDNAseq Report</h1>", "\n")
 		
 		cat( '<h3 class="qdnaseq">About this analysis</h3>', "\n")
-		cat( '<p>This page provides access to all results. To have a local copy of this report just download the <a href="', gzipOutputName, '" class="button-success button-small pure-button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep='')
-		#cat( '<a href="#" class="button-success button-xsmall pure-button">test</a>' )
-		
+		cat( '<p>This page provides access to all results. To have a local copy of this report just download the <a href="', gzipOutputName, '" class="button-success button-small pure-button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep='')		
 		
 		## ------------------------
 		## table with general info
@@ -413,8 +400,6 @@
 			cat( htmlTableRow( c( "AnalysisName", outputName ) ) )
 			cat( htmlTableRow( c( "AnalysisDate", 'todo' ) ) )
 			cat( htmlTableRow( c( "BinSize (kb)", binSize ) ) )
-			#cat( htmlTableRow( c( "undoSD", undoSD ) ) )
-			#cat( htmlTableRow( c( "useBlacklist", filterBlacklistedBins ) ) )
 			
 			for ( galaxyName in fileList[[ 'all_files' ]] ){
 				sampleName <- fileList[[ galaxyName ]]
@@ -431,7 +416,6 @@
 
 		cat( '<h3 class="qdnaseq">Output files</h3><p>', "\n")
 		cat( '<dl>', "\n" )
-            #cat( '<dt>Definition term</dt>', "\n", '<dd>Definition term</dd>', "\n" )
             cat( '<dt>', htmlLink( path=robjReadCoName, robjReadCoName ), '</dt>', "\n" ) 
             cat( '<dd>QDNAseq object with read counts per bin, ', readCoSize,'</dd>', "\n" )
             r_code <- paste( r_code, '<code class="code">readCounts <- readRDS(', robjReadCoName, ")</code><br />", sep="")
@@ -458,6 +442,7 @@
             }
         cat( '</dl></p>', "\n" )
 
+        ## r_code shows example code on how to load output files
         #cat( r_code, "</p>\n", sep="\n")
         cat( '<p>See ', htmlLink( WEB_LINK, 'the bioconductor QDNAseq documentation' ), ' for more information on how to work with these files</p>', "\n", sep='' )
 
@@ -506,36 +491,48 @@
 			}
 		cat( "</tbody></table></p>", "\n")
 
-		## add noise plot
-		html_noise_img <- htmlLink( path=noise_img_file, paste('<img id="', noise_img_file,'" src="',noise_img_file,'" alt="NoisePlot">', sep='') )
-		plots_html <- paste( plots_html, html_noise_img, "\n<hr \\>\n", sep='' )
+		## ------------------------
+		## create the noise and frequency plots
+		## ------------------------
+		html_noise_img <- htmlLink( 
+			path=noiseImgName, 
+			paste('<img id="', noiseImgName,'" src="', noiseImgName,'" alt="NoisePlot">', sep='') 
+		)
+		html_freq_img  <- htmlLink( 
+			path=freqImgName, 
+			paste('<img id="', freqImgName,'" src="', freqImgName,'" alt="FrequenceyPlot">', sep='') 
+		)
+		extra_plots_html <- paste( 
+			html_noise_img, "\n<br \\>\n", 
+			html_freq_img, "\n", sep='' 
+		)
 		
 		## ------------------------
 		## section with various output shown
 		## ------------------------
-		cat( '<h3 class="qdnaseq">Results: plots</h3><p>', "\n")
+		cat( '<h3 class="qdnaseq">Results: Analysis plots</h3><p>', "\n")
+		cat( extra_plots_html, "\n")
+		cat( '<h3 class="qdnaseq">Results: Sample plots</h3><p>', "\n")
 		cat( plots_html, "\n")
 		cat( "\n</p></body>\n")
 		cat( "\n</html>\n")
 sink()
 
 ## ------------------------
-## creating html output to be viewed in middle galaxy pane
+## creating main html output for galaxy history
 ## ------------------------
-#sink( file = htmlOutputPath, type = "output" )
 sink( file = outputHtml, type = "output" )
 		
 		cat( "<head>", "\n")
 			cat( "\t", '<link rel="stylesheet" href="http://yui.yahooapis.com/pure/0.4.2/pure-min.css">', "\n" )
 
 			cat( "<style>", "\n")
-				## have to include CSS into html file, because css referencing outside own dir doesn't seem to work...makes it more portable anyway :P
+				## include CSS directly into html file
 				cat( paste( "\t", '/* the css here originates from ', CSS_FILE,' */', "\n") )
 				cat( paste( "\t", readLines( CSS_FILE, n = -1)), sep="\n" )
 			cat( "</style>", "\n")
 		cat( "</head>", "\n")
 
-
 		cat( '<h1>QDNAseq Results (', outputName,')</h1>', "\n", sep="")
 		cat( '<p>Explore <a href="', htmlOutputName, '" class="button-success button-small pure-button">the results</a> directly within galaxy</p>', "\n", sep="")
 		cat( '<p>Or download a <a href="', gzipOutputName, '" class="button-success button-small pure-button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep="" )
@@ -544,9 +541,10 @@
 		
 sink()
 
-
-cat( MAIN_NAME, "...zipping output\n")
-zip_cmd <- paste( "zip -j ", gzipOutputPath, paste(outputPath,'/', htmlOutputName, sep='') ) ## -j is for removing dirs from the tree
-system( zip_cmd )
-cat( MAIN_NAME, "done...\n", sep="")
+## ------------------------
+## create final zip and quit with status 0 to tell galaxy all was fine
+## ------------------------
+catMsg( "zipping all output")
+system( paste( "zip -j ", gzipOutputPath, paste(outputPath,'/', htmlOutputName, sep='') ) )
+catMsg( "done" )
 q(status=0)