Mercurial > repos > stef > qdnaseq
diff QDNAseq.R @ 59:bfe9d9b7e261 draft
Uploaded
author | stef |
---|---|
date | Thu, 07 Aug 2014 11:02:25 -0400 |
parents | c962b56a2dd8 |
children | 2f0af8970aea |
line wrap: on
line diff
--- a/QDNAseq.R Thu Aug 07 11:02:15 2014 -0400 +++ b/QDNAseq.R Thu Aug 07 11:02:25 2014 -0400 @@ -54,27 +54,27 @@ ## -------------------- ## copied code for extracting the regions by segment call status ## -------------------- -fuse.regions_test <- function(cgh, onlyCalled=T) { - if (ncol(cgh) > 1) stop('Please specify which sample...') +fuseRegions <- function( cgh, onlyCalled=T ) { + if ( ncol(cgh) > 1 ) stop('Please specify which sample...') - x <- data.frame(cgh@featureData@data[,1:3], calls(cgh), copynumber(cgh), segmented(cgh), check.names=FALSE, stringsAsFactors=FALSE) - colnames( x ) <- c( "chr", "start", "end", "call", "log2", "segmentval" ) + data <- data.frame( cgh@featureData@data[,1:3], calls(cgh), copynumber(cgh), segmented(cgh), check.names=FALSE, stringsAsFactors=FALSE) + colnames( data ) <- c( "chr", "start", "end", "call", "log2", "segmentval" ) fused.data <- data.frame() curr.bin <- 1 - for (chr in unique(x$chr)) { + for ( chr in unique( data$chr ) ) { - chr.data <- x[x$chr == chr,] - prev.bin <- curr.bin - prev.call <- chr.data[1, 'call'] - prev.log2 <- chr.data[1, 'log2'] - prev.segm <- chr.data[1, 'segmentval'] - start <- chr.data[1, 'start'] + chr.data <- data[ data$chr == chr, ] + prev.bin <- curr.bin + prev.call <- chr.data[ 1, 'call' ] + prev.log2 <- chr.data[ 1, 'log2' ] + prev.segm <- chr.data[ 1, 'segmentval' ] + start <- chr.data[ 1, 'start' ] if ( nrow(chr.data) > 1) { for ( i in 2:nrow(chr.data) ) { - curr.bin <- curr.bin + 1 + curr.bin <- curr.bin + 1 curr.call <- chr.data[ i, 'call'] curr.segm <- chr.data[ i, 'segmentval'] @@ -86,15 +86,16 @@ } prev.call <- curr.call prev.segm <- curr.segm - prev.bin <- curr.bin - start <- chr.data[ i, 'start'] + prev.bin <- curr.bin + start <- chr.data[ i, 'start'] } } fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], call=prev.call, segmentval=round(prev.segm, digits=DECIMALS) ) ) - }else { + }else{ fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], call=prev.call, segmentval=round(prev.segm, digits=DECIMALS) ) ) } } + ## if requested remove the regions with call status 0 (= normal) if ( onlyCalled == T ){ fused.data <- fused.data[ fused.data$call != 0, ] } @@ -125,25 +126,16 @@ regionCount <- nrow( regionsList[[sample]] ) outSampleBase <- paste( outputBasename, '_', sample, '_QDNAseqRegions', sep='') - outBedFile <- paste( outSampleBase, '.bed', sep="" ) - outBedPath <- paste( outputDir, '/', outBedFile, sep="" ) outBedgraphFile <- paste( outSampleBase, '.bedGraph', sep="" ) outBedgraphPath <- paste( outputDir, '/', outBedgraphFile, sep="" ) - ## ---------- BED ---------- - txt <- "#" - sink( outBedPath ) - cat( txt ) - sink() - write.table( regionsList[[sample]], outBedPath, quote=F, sep="\t", row.names=F, append=T) - ## ---------- BEDGRAPH ---------- txt <- paste( "track type=bedGraph color=0,100,0 altColor=255,0,0 name=", sample,"_segmReg description=segmented_regions_from_QDNAseq\n", sep="") sink( outBedgraphPath ) cat( txt ) sink() write.table( regionsList[[sample]][,bedgraphColumns], outBedgraphPath, quote=F, sep="\t", row.names=F, append=T, col.names=F) - outFiles[[sample]] <- c( outBedFile, outBedgraphFile ) + outFiles[[sample]] <- c( outBedgraphFile ) } outFiles } @@ -156,6 +148,7 @@ CSS_FILE <- paste( TOOL_PATH, '/QDNAseq.css', sep="" ) DECIMALS <- 3 WEB_LINK <- 'http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html' +PURE_CSS <- 'http://yui.yahooapis.com/pure/0.5.0/pure-min.css' PAR_SET <- list( pch=22 ) catMsg( "Starting QDNAseq wrapper" ) @@ -168,7 +161,7 @@ config <- cmdLineArgs[1] ## sourcing the config file will load all input params -## many variables are created in sourced "config" +## many variables are imported via sourced "config" source( config ) PLOT_RES <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 6.3 # desparate try to make png text scale well, damn you R...! @@ -176,9 +169,10 @@ systemUser <- system("whoami",T) qdnaseqVersion <- packageDescription( "QDNAseq" )$Version +rVersion <- R.version.string catMsg( c("Analysis run with user: ", systemUser ) ) catMsg( c("QDNAseq version loaded: ", qdnaseqVersion) ) -catMsg( c( sessionInfo()$R.version$version.string ) ) +catMsg( c( rVersion ) ) ## get the comma separated list of chromosomes to exclude excludeChrs <- unlist( strsplit( excludeChrsString, ",") ) @@ -192,11 +186,7 @@ ## /DEBUG ## ------------------------ -## setup bam filelist for easy retrieval later -fileList <- makeBamFileList( bamsPaths, bamsNames ) -bamCount <- length( fileList[[ 'all_paths' ]] ) - -## help msg for running script separate still needs work! +## help msg for running script without wrapper still needs work! #if ( length(cmdLineArgs) == 0 || cmdLineArgs[1] == "-h" || cmdLineArgs[1] == "--help"){ # catMsg( c( "Usage: ", paste(params_help,sep=",") ) ) # quit( save = 'no', status=0, runLast=F ) @@ -214,36 +204,40 @@ ## construct output file-names and -paths ## ------------------------ htmlOutputName <- 'index.html' -gzipOutputName <- paste( 'QDNAseqResults_', outputName, '.zip', sep='' ) +gzipOutputName <- paste( 'QDNAseqResults_', binSize, 'kbp_', outputName, '.zip', sep='' ) robjReadCoName <- paste( binSize, 'kbp_QDNAseqReadCounts.rds', sep='') robjCopyNrName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.rds', sep='') robjCalledName <- paste( binSize, 'kbp_QDNAseqCalledSegments.rds', sep='') regiOutputName <- paste( binSize, 'kbp_QDNAseqRegions.rds', sep='') -igvCalledName <- paste( binSize, 'kbp_QDNAseq-calls.igv', sep='') igvCopyNrName <- paste( binSize, 'kbp_QDNAseq-normalized.igv', sep='') noiseImgName <- paste( 'QDNAseqNoisePlot.png', sep='') freqImgName <- paste( 'QDNAseqFrequencyPlot.png', sep='') - gzipOutputPath <- paste( outputPath, '/', gzipOutputName, sep='') htmlOutputPath <- paste( outputPath, '/', htmlOutputName, sep='') robjReadCoPath <- paste( outputPath, '/', robjReadCoName, sep='') robjCopyNrPath <- paste( outputPath, '/', robjCopyNrName, sep='') robjCalledPath <- paste( outputPath, '/', robjCalledName, sep='') robjRegionPath <- paste( outputPath, '/', regiOutputName, sep='') -igvCalledPath <- paste( outputPath, '/', igvCalledName, sep='') igvCopyNrPath <- paste( outputPath, '/', igvCopyNrName, sep='') noiseImgPath <- paste( outputPath, '/', noiseImgName, sep='' ) freqImgPath <- paste( outputPath, '/', freqImgName, sep='' ) +## setup bam filelist for easy retrieval later +fileList <- makeBamFileList( bamsPaths, bamsNames ) +bamCount <- length( fileList[[ 'all_paths' ]] ) ## ------------------------ ## performing QDNAseq analysis steps ## ------------------------ -if ( debug ){ - ## in case of debug just use inbuilt LGG data for speedup +if ( debug ){ ## in case of debug just use inbuilt LGG data for speedup data(LGG150) readCounts <- LGG150 + binSize <- 15 + bamsPaths <- c( "BUILD_IN_DATA_LGG150") + bamsNames <- c( "LGG150") + fileList <- makeBamFileList( bamsPaths, bamsNames ) + bamCount <- length( fileList[[ 'all_paths' ]] ) }else{ if ( nchar(binAnnotations) == 0 ){ binAnnotations <- getBinAnnotations( binSize=binSize, type=experimentType ) @@ -277,6 +271,9 @@ if ( doOutputCopynumbersRds ){ saveRDS( copyNumbersSmooth, copyNumbersDatasetFile ); } +if ( doOutputCopynumbersIgv ){ + saveRDS( copyNumbersSmooth, copyNumbersIgvDatasetFile ); +} ## proceed with calling if requested if ( doCall ){ @@ -288,8 +285,8 @@ if ( doOutputCallsRds ){ saveRDS( copyNumbersCalled, calledSegmentsDatasetFile ); } - exportBins( copyNumbersCalled, file=igvCalledPath, format="igv") - + ## actual calls are not included in exportBins output, so not used now + #exportBins( copyNumbersCalled, file=igvCalledPath, format="igv") } @@ -312,9 +309,7 @@ dev.off() } - for (i in 1:length(sampleNames) ){ -#for (sample in sampleNames(copyNumbersSmooth) ){ sample <- sampleNames[i] usedReads <- readCountsFiltered@phenoData@data$used.reads[i] catMsg( c("Creating plots for sample: ", sample ) ) @@ -339,7 +334,7 @@ plotted_images[[ sample ]][[ type ]] <- img_file catMsg( c(" Fusing regions of sample: ", sample) ) - regions[[ sample ]] <- fuse.regions_test( cgh[, sample] ) + regions[[ sample ]] <- fuseRegions( cgh[, sample] ) region_count <- nrow( data.frame( regions[[ sample ]] ) ) catMsg( c( ' sample "', sample, '" has ', region_count, " regions" ) ) plotted_images[[ sample ]][[ 'region_count' ]] <- region_count @@ -364,13 +359,12 @@ ## ------------------------ ## get filesizes for report ## ------------------------ -zippedSize <- paste( round( file.info( gzipOutputPath )[["size"]] / 1000000, digits=2 ), 'MB' ) -readCoSize <- paste( round( file.info( robjReadCoPath )[["size"]] / 1000000, digits=2 ), 'MB' ) -copyNrSize <- paste( round( file.info( robjCopyNrPath )[["size"]] / 1000000, digits=2 ), 'MB' ) -calledSize <- paste( round( file.info( robjCalledPath )[["size"]] / 1000000, digits=2 ), 'MB' ) -regionSize <- paste( round( file.info( robjRegionPath )[["size"]] / 1000000, digits=2 ), 'MB' ) -igvCopyNrSize <- paste( round( file.info( igvCopyNrPath )[["size"]] / 1000000, digits=2 ), 'MB' ) -igvCalledSize <- paste( round( file.info( igvCalledPath )[["size"]] / 1000000, digits=2 ), 'MB' ) +zippedSize <- paste( round( file.info( gzipOutputPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) +readCoSize <- paste( round( file.info( robjReadCoPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) +copyNrSize <- paste( round( file.info( robjCopyNrPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) +calledSize <- paste( round( file.info( robjCalledPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) +regionSize <- paste( round( file.info( robjRegionPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) +igvCopyNrSize <- paste( round( file.info( igvCopyNrPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) ## ------------------------ ## creating html output to be linked to from the middle galaxy pane @@ -381,14 +375,14 @@ cat( "<html>\n") cat( "<head>\n") #cat( '<link rel="stylesheet" type="text/css" href="test.css" media="screen" />', "\n" ) - cat( "\t", '<link rel="stylesheet" href="http://yui.yahooapis.com/pure/0.4.2/pure-min.css">', "\n" ) - cat( "\t<style>\n") - ## have to include CSS into html file, because css referencing outside own dir doesn't seem to work... - cat( paste( "\t\t", '/* the css here originates from ', CSS_FILE,' */', "\n") ) - cat( paste( "\t\t", readLines( CSS_FILE, n = -1)), sep="\n" ) + cat( "\t", '<title>QDNAseq Report | ', outputName,'</title>', "\n", sep='' ) + cat( "\t", '<link rel="stylesheet" href="', PURE_CSS, '">', "\n", sep='' ) + cat( "\t<style>\n", sep='') + ## include CSS into html file, makes it more portable + cat( paste( "\t\t", '/* the css here originates from ', CSS_FILE,' */', "\n"), sep='' ) + cat( "\t\t", readLines( CSS_FILE ), sep="\n\t\t" ) #cat( "\t\th1 {color:red;}", "\n") - - cat( "\t</style>\n" ) + cat( "\n\t</style>\n" ) cat( "\n</head>\n") cat( "\n<body>\n") @@ -396,39 +390,41 @@ cat( "<h1>QDNAseq Report</h1>", "\n") cat( '<h3 class="qdnaseq">About this analysis</h3>', "\n") - cat( '<p>This page provides access to all results. To have a local copy of this report just download the <a href="', gzipOutputName, '" class="button-success button-small pure-button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep='') + cat( '<p>This page provides access to all results. To have a local copy of this report just download the <a href="', gzipOutputName, '" class="button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep='') ## ------------------------ ## table with general info ## ------------------------ cat( '<h3 class="qdnaseq">Settings</h3><p>', "\n") - cat( '<table class="pure-table pure-table-striped"><thead><tr><th>setting</th><th>value</th></tr></thead><tbody>' ) + cat( '<table class="pure-table pure-table-striped"><tbody>' ) cat( htmlTableRow( c( "AnalysisName", outputName ) ) ) - cat( htmlTableRow( c( "AnalysisDate", 'todo' ) ) ) + cat( htmlTableRow( c( "AnalysisDate", as.character(Sys.time()) ) ) ) cat( htmlTableRow( c( "BinSize (kb)", binSize ) ) ) + cat( htmlTableRow( c( "R info", rVersion ) ) ) + cat( htmlTableRow( c( "QDNAseq info", qdnaseqVersion ) ) ) + sampleStrings <- c() for ( galaxyName in fileList[[ 'all_files' ]] ){ sampleName <- fileList[[ galaxyName ]] - cat( htmlTableRow( c( "InputBam", paste( galaxyName, ' (', sampleName, ')', sep='' ) ) ) ) + sampleStrings <- c( sampleStrings, paste( galaxyName, ' (', sampleName, ')', sep='' ) ) } + cat( htmlTableRow( c( "InputBams", paste( sampleStrings, collapse=", ") ) ) ) + cat( "</tbody></table></p>", "\n") ## ------------------------ ## list with links to all output files ## ------------------------ - r_code <- '<p>' - r_code <- paste( r_code, '<code class="code">## R code to load files</code><br />', sep="\n" ) - r_code <- paste( r_code, '<code class="code">library( QDNAseq )</code><br />', sep="\n") + cat( '<h3 class="qdnaseq">Output files</h3><p>', "\n") cat( '<dl>', "\n" ) cat( '<dt>', htmlLink( path=robjReadCoName, robjReadCoName ), '</dt>', "\n" ) cat( '<dd>QDNAseq object with read counts per bin, ', readCoSize,'</dd>', "\n" ) - r_code <- paste( r_code, '<code class="code">readCounts <- readRDS(', robjReadCoName, ")</code><br />", sep="") + cat( '<dt>', htmlLink( path=robjCopyNrName, robjCopyNrName ), '</dt>', "\n" ) cat( '<dd>QDNAseq object with copy numbers per bin, ', copyNrSize,'</dd>', "\n" ) - r_code <- paste( r_code, '<code class="code">copyNumbersSmooth <- readRDS(', robjCopyNrName, ")</code><br />", sep="") cat( '<dt>', htmlLink( path=igvCopyNrName, igvCopyNrName ), '</dt>', "\n" ) cat( '<dd>IGV formatted text file with copy numbers per bin, ', igvCopyNrSize,'</dd>', "\n" ) @@ -436,21 +432,20 @@ if ( doCall ){ cat( '<dt>', htmlLink( path=robjCalledName, robjCalledName ), '</dt>', "\n" ) cat( '<dd>QDNAseq object with segment and call status per bin, ', calledSize,'</dd>', "\n" ) - r_code <- paste( r_code, '<code class="code">copyNumbersCalled <- readRDS(', robjCalledName, ")</code><br />", sep="") cat( '<dt>', htmlLink( path=regiOutputName, regiOutputName ), '</dt>', "\n" ) cat( '<dd>list with segmented/called regions for each sample, ', regionSize, '</dd>', "\n" ) - r_code <- paste( r_code, '<code class="code">calledRegions <- readRDS(', regiOutputName, ")</code><br />", sep="") - - cat( '<dt>', htmlLink( path=igvCalledName, igvCalledName ), '</dt>', "\n" ) - cat( '<dd>IGV formatted text file with calls per bin , ', igvCalledSize,'</dd>', "\n" ) - } cat( '</dl></p>', "\n" ) + r_code <- '## start own downstream analysis in R with:' + r_code <- c( r_code, 'library( QDNAseq )' ) + r_code <- c( r_code, paste( 'readRDS( ', robjReadCoName, ' ) -> readCounts', sep="") ) + ## r_code shows example code on how to load output files #cat( r_code, "</p>\n", sep="\n") cat( '<p>See ', htmlLink( WEB_LINK, 'the bioconductor QDNAseq documentation' ), ' for more information on how to work with these files</p>', "\n", sep='' ) + cat( '<pre>', paste( r_code, collapse="\n" ), '</pre>', "\n", sep='' ) ## ------------------------ ## table with links to files @@ -475,7 +470,6 @@ html_copy_img <- htmlLink( path=copy_img, paste('<img id="', copy_img,'" src="',copy_img,'" alt="',bam_file, '" width="', width, '" height="', height, '">', sep='') ) html_call_thumb <- 'NA' html_call_img <- '' - html_bed <- 'NA' html_bedGraph <- 'NA' region_count <- 'NA' @@ -484,14 +478,13 @@ region_count <- plotted_images[[ bam_file ]][[ 'region_count' ]] html_call_thumb <- htmlLink( path=paste('#', call_img, sep=''), paste('<img src="', call_img, '" alt="', bam_file, '" width="', width_t,'" height="', height_t,'">', sep='') ) - files <- printedFiles[[ bam_file ]] - html_bed <- htmlLink( path=files[1], 'bed' ) - html_bedGraph <- htmlLink( path=files[2], 'bedGraph' ) + ## list structure with output files just is an array per BAM + html_bedGraph <- htmlLink( path=printedFiles[[ bam_file ]][1], 'bedGraph' ) html_call_img <- htmlLink( path=call_img, paste('<img id="', call_img,'" src="', call_img,'" alt="', bam_file, '" width="', width, '" height="', height,'">', sep='') ) } ## add info to overview table, including small thumbnails - cat( htmlTableRow( c(bam_file, html_copy_thumb, html_call_thumb, usedReads, region_count, paste( html_bed, html_bedGraph, sep=", ")) ), "\n" ) + cat( htmlTableRow( c(bam_file, html_copy_thumb, html_call_thumb, usedReads, region_count, paste( html_bedGraph, sep=", ")) ), "\n" ) ## now include (large) images in html page plots_html <- paste( plots_html, html_copy_img, "\n", html_call_img, "\n<hr \\>\n", sep='' ) } @@ -510,11 +503,8 @@ ) ## there only is a frequency plot when data is called if ( !doCall ){ html_freq_img <- '' } - - extra_plots_html <- paste( - html_noise_img, " \n", - html_freq_img, "\n", sep='' - ) + + extra_plots_html <- paste( html_noise_img, " \n", html_freq_img, "\n", sep='' ) ## ------------------------ ## section with various output shown @@ -533,7 +523,7 @@ sink( file = outputHtml, type = "output" ) cat( "<head>", "\n") - cat( "\t", '<link rel="stylesheet" href="http://yui.yahooapis.com/pure/0.4.2/pure-min.css">', "\n" ) + cat( "\t", '<link rel="stylesheet" href="', PURE_CSS, '">', "\n", sep='' ) cat( "<style>", "\n") ## include CSS directly into html file @@ -543,8 +533,8 @@ cat( "</head>", "\n") cat( '<h1>QDNAseq Results (', outputName,')</h1>', "\n", sep="") - cat( '<p>Explore <a href="', htmlOutputName, '" class="button-success button-small pure-button">the results</a> directly within galaxy</p>', "\n", sep="") - cat( '<p>Or download a <a href="', gzipOutputName, '" class="button-success button-small pure-button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep="" ) + cat( '<p>Explore <a href="', htmlOutputName, '" class="button">the results</a> directly within galaxy</p>', "\n", sep="") + cat( '<p>Or download a <a href="', gzipOutputName, '" class="button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep="" ) cat( '<p>The zip file contains all output files, including *.rds files allowing you to load the R copyNumber object(s) and perform further detailed analysis or create your own output for further processing. You can load the rds file with <code class="code">loadRDS(file.rds)</code></p>', "\n", sep="")