Mercurial > repos > stef > qdnaseq
diff QDNAseq.R @ 64:2f0af8970aea draft
Uploaded
author | stef |
---|---|
date | Wed, 20 Aug 2014 11:16:41 -0400 |
parents | bfe9d9b7e261 |
children | 823791a3d7e1 |
line wrap: on
line diff
--- a/QDNAseq.R Wed Aug 20 11:16:26 2014 -0400 +++ b/QDNAseq.R Wed Aug 20 11:16:41 2014 -0400 @@ -54,63 +54,51 @@ ## -------------------- ## copied code for extracting the regions by segment call status ## -------------------- -fuseRegions <- function( cgh, onlyCalled=T ) { - if ( ncol(cgh) > 1 ) stop('Please specify which sample...') +fuseRegions <- function( obj, minRatio=0 ) { + if ( ncol(obj) > 1 ) stop('Please specify which sample...') - data <- data.frame( cgh@featureData@data[,1:3], calls(cgh), copynumber(cgh), segmented(cgh), check.names=FALSE, stringsAsFactors=FALSE) - colnames( data ) <- c( "chr", "start", "end", "call", "log2", "segmentval" ) + data <- data.frame( obj@featureData@data[,1:3], copynumber(obj), segmented(obj), check.names=FALSE, stringsAsFactors=FALSE) + colnames( data ) <- c( "chr", "start", "end", "log2", "segmentval" ) fused.data <- data.frame() curr.bin <- 1 for ( chr in unique( data$chr ) ) { - chr.data <- data[ data$chr == chr, ] prev.bin <- curr.bin - prev.call <- chr.data[ 1, 'call' ] prev.log2 <- chr.data[ 1, 'log2' ] prev.segm <- chr.data[ 1, 'segmentval' ] start <- chr.data[ 1, 'start' ] if ( nrow(chr.data) > 1) { for ( i in 2:nrow(chr.data) ) { - curr.bin <- curr.bin + 1 - curr.call <- chr.data[ i, 'call'] curr.segm <- chr.data[ i, 'segmentval'] if ( curr.segm != prev.segm ) { - - fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], call=prev.call, segmentval=round(prev.segm, digits=DECIMALS) ) ) - if ( prev.call != 0 ){ - catMsg( c(" ...found called/segmented region (", chr, ':', start, ' call=', prev.call, ' segment=', prev.segm, ")" ) ) - } - prev.call <- curr.call + fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], segmentval=round(prev.segm, digits=DECIMALS) ) ) prev.segm <- curr.segm prev.bin <- curr.bin start <- chr.data[ i, 'start'] } } - fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], call=prev.call, segmentval=round(prev.segm, digits=DECIMALS) ) ) + fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], segmentval=round(prev.segm, digits=DECIMALS) ) ) }else{ - fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], call=prev.call, segmentval=round(prev.segm, digits=DECIMALS) ) ) + fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], segmentval=round(prev.segm, digits=DECIMALS) ) ) } } - ## if requested remove the regions with call status 0 (= normal) - if ( onlyCalled == T ){ - fused.data <- fused.data[ fused.data$call != 0, ] - } + ## remove regions with low amplitude + fused.data <- fused.data[ abs(fused.data$segmentval) >= minRatio, ] fused.data } ## DESC: takes the output of fuse.regions and outputs a txt file per sample -outputRegionsFromList <- function ( regionsList, outputBasename, outputDir="./" ){ +outputRegionsFromList <- function ( regionsList, outputBasename, outputDir="./", binSize, storeList ){ if ( missing(regionsList) ) stop( 'Please provide regionsList...' ) if ( missing(outputBasename) ) stop( 'Please provide outputBasename...' ) if ( !is.list(regionsList) ) stop( 'Input not a list...?' ) if ( length(regionsList) < 1 ) stop( 'List seems empty...?' ) if ( file.exists( outputDir ) ) catMsg( c(" Using dir ", outputDir, " for output") ) else dir.create( outputDir ) - outFiles <- list() ## have to set R output options otherwise scientific method is used at some point options( "scipen"=100 ) @@ -125,21 +113,23 @@ catMsg( c(" Working on sample ", sample ) ) regionCount <- nrow( regionsList[[sample]] ) - outSampleBase <- paste( outputBasename, '_', sample, '_QDNAseqRegions', sep='') + outSampleBase <- paste( outputBasename, '_', sample, '_', binSize, 'kbp', sep='') outBedgraphFile <- paste( outSampleBase, '.bedGraph', sep="" ) outBedgraphPath <- paste( outputDir, '/', outBedgraphFile, sep="" ) ## ---------- BEDGRAPH ---------- - txt <- paste( "track type=bedGraph color=0,100,0 altColor=255,0,0 name=", sample,"_segmReg description=segmented_regions_from_QDNAseq\n", sep="") + txt <- paste( "track type=bedGraph color=0,100,0 altColor=255,0,0 name=", sample," description=segmented_regions_from_QDNAseq_",binSize,"kbp\n", sep="") sink( outBedgraphPath ) cat( txt ) sink() write.table( regionsList[[sample]][,bedgraphColumns], outBedgraphPath, quote=F, sep="\t", row.names=F, append=T, col.names=F) - outFiles[[sample]] <- c( outBedgraphFile ) + #outFiles[[sample]] <- c( outBedgraphFile ) + storeList[[ paste( binSize, sample, 'bedgraph', sep="_")]] <- outBedgraphFile } - outFiles + return(storeList) } + ## ================================================== ## Start of analysis ## ================================================== @@ -149,7 +139,6 @@ DECIMALS <- 3 WEB_LINK <- 'http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html' PURE_CSS <- 'http://yui.yahooapis.com/pure/0.5.0/pure-min.css' -PAR_SET <- list( pch=22 ) catMsg( "Starting QDNAseq wrapper" ) catMsg( "Loading R libraries" ) @@ -164,190 +153,197 @@ ## many variables are imported via sourced "config" source( config ) -PLOT_RES <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 6.3 # desparate try to make png text scale well, damn you R...! -if ( doOutputCallsRds == TRUE ){ doCall <- TRUE } +## desparate tries to make png text scale well, damn you R...! +PLOT_RES <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 6.3 +#PLOT_PS <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 40 +#TXT_FACTOR <- PLOT_WIDTH/480 +#PAR_SET <- list( pch=22, cex.lab=TXT_FACTOR, cex.main=TXT_FACTOR ) +PAR_SET <- list( pch=22 ) systemUser <- system("whoami",T) qdnaseqVersion <- packageDescription( "QDNAseq" )$Version rVersion <- R.version.string -catMsg( c("Analysis run with user: ", systemUser ) ) -catMsg( c("QDNAseq version loaded: ", qdnaseqVersion) ) +startTime <- Sys.time() +analysisStart <- as.character( startTime ) +catMsg( c("QDNAseq version: ", qdnaseqVersion) ) catMsg( c( rVersion ) ) ## get the comma separated list of chromosomes to exclude excludeChrs <- unlist( strsplit( excludeChrsString, ",") ) +binSizes <- as.numeric( unlist( strsplit( binSizesString, ",") ) ) + ## ------------------------ ## DEBUG -#catMsg( "PARAM" ) -#catMsg( galaxy_path ) -#catMsg( repos_path ) -#catMsg( instal_path ) +if ( debug ){ + catMsg( c("Analysis run under user: ", systemUser ) ) + catMsg( c("Here comes sessionInfo: " ) ) + print( sessionInfo() ) +} ## /DEBUG ## ------------------------ -## help msg for running script without wrapper still needs work! -#if ( length(cmdLineArgs) == 0 || cmdLineArgs[1] == "-h" || cmdLineArgs[1] == "--help"){ -# catMsg( c( "Usage: ", paste(params_help,sep=",") ) ) -# quit( save = 'no', status=0, runLast=F ) -#} - +## prepare output dir if ( !file.exists( outputPath) ){ dir.create( outputPath ) } -## because we circumvent params that galaxy can save, we want to ## copy source config file to output dir to include it in output zip -file.copy( config, paste(outputPath, 'qdnaseq_config_file.R', sep='/') ) - -## ------------------------ -## construct output file-names and -paths -## ------------------------ -htmlOutputName <- 'index.html' -gzipOutputName <- paste( 'QDNAseqResults_', binSize, 'kbp_', outputName, '.zip', sep='' ) -robjReadCoName <- paste( binSize, 'kbp_QDNAseqReadCounts.rds', sep='') -robjCopyNrName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.rds', sep='') -robjCalledName <- paste( binSize, 'kbp_QDNAseqCalledSegments.rds', sep='') -regiOutputName <- paste( binSize, 'kbp_QDNAseqRegions.rds', sep='') -igvCopyNrName <- paste( binSize, 'kbp_QDNAseq-normalized.igv', sep='') -noiseImgName <- paste( 'QDNAseqNoisePlot.png', sep='') -freqImgName <- paste( 'QDNAseqFrequencyPlot.png', sep='') - -gzipOutputPath <- paste( outputPath, '/', gzipOutputName, sep='') -htmlOutputPath <- paste( outputPath, '/', htmlOutputName, sep='') -robjReadCoPath <- paste( outputPath, '/', robjReadCoName, sep='') -robjCopyNrPath <- paste( outputPath, '/', robjCopyNrName, sep='') -robjCalledPath <- paste( outputPath, '/', robjCalledName, sep='') -robjRegionPath <- paste( outputPath, '/', regiOutputName, sep='') -igvCopyNrPath <- paste( outputPath, '/', igvCopyNrName, sep='') -noiseImgPath <- paste( outputPath, '/', noiseImgName, sep='' ) -freqImgPath <- paste( outputPath, '/', freqImgName, sep='' ) +file.copy( config, paste(outputPath, 'galaxyConfigFile.R', sep='/') ) ## setup bam filelist for easy retrieval later fileList <- makeBamFileList( bamsPaths, bamsNames ) bamCount <- length( fileList[[ 'all_paths' ]] ) -## ------------------------ -## performing QDNAseq analysis steps +gzipOutputName <- paste( 'QDNAseqResults_', outputName, '.zip', sep='' ) +gzipOutputPath <- paste( outputPath, '/', gzipOutputName, sep='') +htmlOutputName <- 'index.html' +htmlOutputPath <- paste( outputPath, '/', htmlOutputName, sep='') + +plotted_images <- list() # to keep track of images for later linking +regions <- list() # will contain the segments +outputFiles <- list() + ## ------------------------ -if ( debug ){ ## in case of debug just use inbuilt LGG data for speedup - data(LGG150) - readCounts <- LGG150 - binSize <- 15 - bamsPaths <- c( "BUILD_IN_DATA_LGG150") +## in case of debug just use inbuilt LGG data for speedup +if ( debug ){ + binSizes <- c(15) + bamsPaths <- c( "BUILD_IN_DATA") bamsNames <- c( "LGG150") fileList <- makeBamFileList( bamsPaths, bamsNames ) bamCount <- length( fileList[[ 'all_paths' ]] ) -}else{ - if ( nchar(binAnnotations) == 0 ){ - binAnnotations <- getBinAnnotations( binSize=binSize, type=experimentType ) - }else{ - ## if user provided file, check if correct class - if ( class(binAnnotations)[1] != 'AnnotatedDataFrame' ){ - stop( "Provided binAnnotations file is not of class 'AnnotatedDataFrame'\n" ) - } - binAnnotations <- readRDS( binAnnotations ) - } - ## provide bamnames because in galaxy everyting is called "dataset_###" - readCounts <- binReadCounts( binAnnotations, bamfiles=fileList[[ 'all_paths' ]], bamnames=bamsNames ) } -readCountsFiltered <- applyFilters( readCounts, residual=TRUE, blacklist=filterBlacklistedBins, mappability=mappabilityCutoff, chromosomes=excludeChrs ) -readCountsFiltered <- estimateCorrection( readCountsFiltered ) -copyNumbers <- correctBins( readCountsFiltered ) -copyNumbersNormalized <- normalizeBins( copyNumbers ) -copyNumbersSmooth <- smoothOutlierBins( copyNumbersNormalized ) -sampleNames <- readCountsFiltered@phenoData@data$name +for ( binSize in binSizes ){ + + ## ------------------------ + ## construct output file-names and -paths + ## ------------------------ + robjReadCoName <- paste( binSize, 'kbp_QDNAseqReadCounts.rds', sep='') + robjCopyNrName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.rds', sep='') + igvCopyNrName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.igv', sep='') + robjSegmntName <- paste( binSize, 'kbp_QDNAseqCopyNumbersSegmented.rds', sep='') + regiOutputName <- paste( binSize, 'kbp_QDNAseqRegions.rds', sep='') + noiseImgName <- paste( binSize, 'kbp_QDNAseqNoiseplot.png', sep='') + + robjReadCoPath <- paste( outputPath, '/', robjReadCoName, sep='') + robjCopyNrPath <- paste( outputPath, '/', robjCopyNrName, sep='') + robjSegmntPath <- paste( outputPath, '/', robjSegmntName, sep='') + robjRegionPath <- paste( outputPath, '/', regiOutputName, sep='') + igvCopyNrPath <- paste( outputPath, '/', igvCopyNrName, sep='') + noiseImgPath <- paste( outputPath, '/', noiseImgName, sep='') + + binAnnotations <- getBinAnnotations( binSize=binSize, type=experimentType ) + + ## in case of debug just use inbuilt LGG data for speedup + if ( debug ){ + data(LGG150) + readCounts <- LGG150 + }else{ + ## provide bamnames because in galaxy everyting is called "dataset_###" + readCounts <- binReadCounts( binAnnotations, bamfiles=fileList[[ 'all_paths' ]], bamnames=fileList[[ 'all_names' ]] ) + } -## save objects to output dir -saveRDS( readCountsFiltered, robjReadCoPath ); -saveRDS( copyNumbersSmooth, robjCopyNrPath ); -exportBins( copyNumbersSmooth, file=igvCopyNrPath, format="igv" ) + readCountsFiltered <- applyFilters( readCounts, residual=TRUE, blacklist=filterBlacklistedBins, mappability=mappabilityCutoff, chromosomes=excludeChrs ) + readCountsFiltered <- estimateCorrection( readCountsFiltered ) + copyNumbers <- correctBins( readCountsFiltered ) + copyNumbersNormalized <- normalizeBins( copyNumbers ) + copyNumbersSmooth <- smoothOutlierBins( copyNumbersNormalized ) + sampleNames <- readCountsFiltered@phenoData@data$name + + ## save objects to output dir + saveRDS( readCountsFiltered, robjReadCoPath ); + saveRDS( copyNumbersSmooth, robjCopyNrPath ); + exportBins( copyNumbersSmooth, file=igvCopyNrPath, format="igv" ) + + ## also save objects for galaxy history output if requested + if ( doOutputCopynumbersIgv ){ + #@ a bit hacky galaxy way to allow an unknown number of output files based on param selection + #@ see: https://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files + historyName <- paste(binSize, 'kbp-IGV', sep="") + igvFile <- paste( newFilePath, "/primary_", outputId, "_", historyName, "_visible_txt", sep="" ) + exportBins( copyNumbersSmooth, file=igvFile, format="igv" ) + catMsg( c("Exported igv file to history for ", binSize, "kbp bin") ) + } + + ## proceed with calling if requested + if ( doSegment ){ + copyNumbersSegmented <- segmentBins( copyNumbersSmooth, undo.splits=undoSplits, undo.SD=undoSD ) + copyNumbersSegmented <- normalizeSegmentedBins( copyNumbersSegmented ) + cgh <- makeCgh( copyNumbersSegmented ) + saveRDS( copyNumbersSegmented, robjSegmntPath ); + } -## also save objects for galaxy history output if requested -if ( doOutputReadcountsRds ){ - saveRDS( readCountsFiltered, readCountsDatasetFile ); -} -if ( doOutputCopynumbersRds ){ - saveRDS( copyNumbersSmooth, copyNumbersDatasetFile ); -} -if ( doOutputCopynumbersIgv ){ - saveRDS( copyNumbersSmooth, copyNumbersIgvDatasetFile ); -} + ## ------------------------ + ## create output files + ## ------------------------ + png( noiseImgPath, width=PLOT_HEIGHT, height=PLOT_HEIGHT, res=PLOT_RES ); + par( PAR_SET ) + noisePlot( readCountsFiltered, main=paste( "Noise Plot ", binSize, "kbp", sep=''), col="darkgreen" ) + dev.off() + + binSize <- as.character( binSize ) # to avoid R using it as array index... (*#$^@ you R!) + binSizeString <- paste( binSize, 'kbp', sep='') + + for (i in 1:length(sampleNames) ){ + + sample <- sampleNames[i] + usedReads <- readCountsFiltered@phenoData@data$used.reads[i] + catMsg( c("Creating plots for sample: ", sample, " (", binSizeString, ")" ) ) + + type <- 'CopyNumbers' + img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='') + img_file_path <- paste( outputPath, '/', img_file, sep='' ) -## proceed with calling if requested -if ( doCall ){ - copyNumbersSegmented <- segmentBins( copyNumbersSmooth, undo.splits=undoSplits, undo.SD=undoSD ) - copyNumbersSegmented <- normalizeSegmentedBins( copyNumbersSegmented ) - copyNumbersCalled <- callBins( copyNumbersSegmented ) - cgh <- makeCgh( copyNumbersCalled ) - saveRDS( copyNumbersCalled, robjCalledPath ); - if ( doOutputCallsRds ){ - saveRDS( copyNumbersCalled, calledSegmentsDatasetFile ); + ## COPYNUMBER PLOT + png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); + par( PAR_SET ) + plot( copyNumbersSmooth[ ,sample ], main=paste(sample, ": CopyNumbers", sep="") ); + mtext( paste( "(", binSizeString, " bins)" sep=""), 3 ) + abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" ) + dev.off() + + plotted_images[[ paste(binSize, sample, type, sep="_" ) ]] <- img_file + + if ( doSegment ){ + type <- 'Segmented' + img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='') + img_file_path <- paste( outputPath, '/', img_file, sep='' ) + + ## COPYNUMBER PLOT + png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); + par( PAR_SET ) + plot( copyNumbersSegmented[ ,sample ], main=paste(sample, ": CopyNumbers and Segments", sep="") ); + mtext( paste( "(", binSizeString, " bins)" sep=""), 3 ) + abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" ) + dev.off() + + plotted_images[[ paste(binSize, sample, type, sep="_" ) ]] <- img_file + + catMsg( c(" Fusing regions of sample: ", sample) ) + regions[[ sample ]] <- fuseRegions( cgh[, sample] ) + + region_count <- nrow( data.frame( regions[[ sample ]] ) ) + catMsg( c( ' sample "', sample, '" has ', region_count, " regions" ) ) + plotted_images[[ paste(binSize, sample, 'region_count', sep="_" ) ]] <- region_count + } + + ## add USED read counts + plotted_images[[ paste(binSize, sample, 'usedReads', sep="_" ) ]] <- usedReads } - ## actual calls are not included in exportBins output, so not used now - #exportBins( copyNumbersCalled, file=igvCalledPath, format="igv") -} + if ( doSegment ){ + saveRDS( regions, robjRegionPath ) + plotted_images <- outputRegionsFromList( regions, outputBasename=outputName, outputDir=outputPath, binSize=binSize, storeList=plotted_images ) + } +}# end bin -## ------------------------ -## create output files -## ------------------------ -plotted_images <- list() # to keep track of images for later linking -regions <- list() # will contain the (called) segments - -png( noiseImgPath, width=PLOT_HEIGHT, height=PLOT_HEIGHT, res=PLOT_RES ); - par( PAR_SET ) - noisePlot( readCountsFiltered, col="darkgreen" ) -dev.off() - -if ( doCall ){ - png( freqImgPath, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); - par( PAR_SET ) - frequencyPlot( copyNumbersCalled ) - dev.off() -} - -for (i in 1:length(sampleNames) ){ - sample <- sampleNames[i] - usedReads <- readCountsFiltered@phenoData@data$used.reads[i] - catMsg( c("Creating plots for sample: ", sample ) ) +## ----- debug ----- +#catMsg( "done" ) +#q(status=0) +## ---- /debug ----- - type <- 'CopyNumbers' - img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='') - img_file_path <- paste( outputPath, '/', img_file, sep='' ) - png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); - par( PAR_SET ) - plot( copyNumbersSmooth[ ,sample ] ); - dev.off() - plotted_images[[ sample ]][[ type ]] <- img_file - - if ( doCall ){ - type <- 'Called' - img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='') - img_file_path <- paste( outputPath, '/', img_file, sep='' ) - png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); - par( PAR_SET ) - plot( copyNumbersCalled[ ,sample ] ); - dev.off() - plotted_images[[ sample ]][[ type ]] <- img_file - - catMsg( c(" Fusing regions of sample: ", sample) ) - regions[[ sample ]] <- fuseRegions( cgh[, sample] ) - region_count <- nrow( data.frame( regions[[ sample ]] ) ) - catMsg( c( ' sample "', sample, '" has ', region_count, " regions" ) ) - plotted_images[[ sample ]][[ 'region_count' ]] <- region_count - } - - ## add USED read counts - plotted_images[[ sample ]][[ 'usedReads' ]] <- usedReads -} - -if ( doCall ){ - saveRDS( regions, robjRegionPath ) - printedFiles <- outputRegionsFromList( regions, outputBasename=outputName, outputDir=outputPath ) -} ## ------------------------ ## prepare output @@ -359,27 +355,22 @@ ## ------------------------ ## get filesizes for report ## ------------------------ -zippedSize <- paste( round( file.info( gzipOutputPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) -readCoSize <- paste( round( file.info( robjReadCoPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) -copyNrSize <- paste( round( file.info( robjCopyNrPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) -calledSize <- paste( round( file.info( robjCalledPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) -regionSize <- paste( round( file.info( robjRegionPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) -igvCopyNrSize <- paste( round( file.info( igvCopyNrPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) +zippedSize <- paste( round( file.info( gzipOutputPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) +endTime <- Sys.time() +timeDiff <- format( round( endTime - startTime, 3 ) ) +analysisEnd <- as.character( endTime ) ## ------------------------ ## creating html output to be linked to from the middle galaxy pane ## ------------------------ - -#sink( file = outputHtml, type = "output" ) sink( file = htmlOutputPath, type = "output" ) cat( "<html>\n") cat( "<head>\n") - #cat( '<link rel="stylesheet" type="text/css" href="test.css" media="screen" />', "\n" ) + cat( "\t", '<title>QDNAseq Report | ', outputName,'</title>', "\n", sep='' ) cat( "\t", '<link rel="stylesheet" href="', PURE_CSS, '">', "\n", sep='' ) cat( "\t<style>\n", sep='') ## include CSS into html file, makes it more portable - cat( paste( "\t\t", '/* the css here originates from ', CSS_FILE,' */', "\n"), sep='' ) cat( "\t\t", readLines( CSS_FILE ), sep="\n\t\t" ) #cat( "\t\th1 {color:red;}", "\n") cat( "\n\t</style>\n" ) @@ -398,8 +389,10 @@ cat( '<h3 class="qdnaseq">Settings</h3><p>', "\n") cat( '<table class="pure-table pure-table-striped"><tbody>' ) cat( htmlTableRow( c( "AnalysisName", outputName ) ) ) - cat( htmlTableRow( c( "AnalysisDate", as.character(Sys.time()) ) ) ) - cat( htmlTableRow( c( "BinSize (kb)", binSize ) ) ) + cat( htmlTableRow( c( "AnalysisStart", analysisStart ) ) ) + cat( htmlTableRow( c( "AnalysisEnd", analysisEnd ) ) ) + cat( htmlTableRow( c( "AnalysisTime", timeDiff ) ) ) + cat( htmlTableRow( c( "BinSizes (kbp)", paste(binSizes,collapse=", ") ) ) ) cat( htmlTableRow( c( "R info", rVersion ) ) ) cat( htmlTableRow( c( "QDNAseq info", qdnaseqVersion ) ) ) @@ -415,103 +408,83 @@ ## ------------------------ ## list with links to all output files ## ------------------------ - - cat( '<h3 class="qdnaseq">Output files</h3><p>', "\n") - cat( '<dl>', "\n" ) - cat( '<dt>', htmlLink( path=robjReadCoName, robjReadCoName ), '</dt>', "\n" ) - cat( '<dd>QDNAseq object with read counts per bin, ', readCoSize,'</dd>', "\n" ) - + cat( '<p>This table contains output files that can be used for local downstream analysis with the bioconductor QDNAseq package. For each bin-size / data-level there is a R data structure file with data of all samples. See ', htmlLink( WEB_LINK, 'the bioconductor QDNAseq documentation' ), ' for more information on how to work with these files</p>', "\n", sep='') + cat( '<table class="pure-table pure-table-striped">', "\n" ) + cat( '<thead><th>Type</th>', as.vector( mapply( paste, "<th>", binSizes, "kbp</th>", sep="" ) ),'</thead>', "\n" ) + cat( "<tbody>", "\n") + files <- list() + fileTypes <- c( 'ReadCounts.rds', 'CopyNumbers.rds' ) + if ( doSegment ){ fileTypes <- c( fileTypes, 'CopyNumbersSegmented.rds') } - cat( '<dt>', htmlLink( path=robjCopyNrName, robjCopyNrName ), '</dt>', "\n" ) - cat( '<dd>QDNAseq object with copy numbers per bin, ', copyNrSize,'</dd>', "\n" ) - - cat( '<dt>', htmlLink( path=igvCopyNrName, igvCopyNrName ), '</dt>', "\n" ) - cat( '<dd>IGV formatted text file with copy numbers per bin, ', igvCopyNrSize,'</dd>', "\n" ) - - if ( doCall ){ - cat( '<dt>', htmlLink( path=robjCalledName, robjCalledName ), '</dt>', "\n" ) - cat( '<dd>QDNAseq object with segment and call status per bin, ', calledSize,'</dd>', "\n" ) - - cat( '<dt>', htmlLink( path=regiOutputName, regiOutputName ), '</dt>', "\n" ) - cat( '<dd>list with segmented/called regions for each sample, ', regionSize, '</dd>', "\n" ) - } - cat( '</dl></p>', "\n" ) - - r_code <- '## start own downstream analysis in R with:' - r_code <- c( r_code, 'library( QDNAseq )' ) - r_code <- c( r_code, paste( 'readRDS( ', robjReadCoName, ' ) -> readCounts', sep="") ) - - ## r_code shows example code on how to load output files - #cat( r_code, "</p>\n", sep="\n") - cat( '<p>See ', htmlLink( WEB_LINK, 'the bioconductor QDNAseq documentation' ), ' for more information on how to work with these files</p>', "\n", sep='' ) - cat( '<pre>', paste( r_code, collapse="\n" ), '</pre>', "\n", sep='' ) + for ( fileType in fileTypes ){ + fileNames <- mapply( paste, binSizes, paste( 'kbp_QDNAseq', fileType, sep=''), sep='') + fileLinks <- mapply( htmlLink, fileNames, paste( binSizes, "kbp", sep="" ) ) + cat( htmlTableRow( c( fileType, fileLinks ) ) ) + } + cat( "\n</tbody></table></p>", "\n") ## ------------------------ ## table with links to files ## ------------------------ + ratio <- PLOT_WIDTH / PLOT_HEIGHT + width <- 960; height <- width / ratio ## bigger img + width_t <- 100; height_t <- 40 ## thumb img + cat( '<h3 class="qdnaseq">Results: overview</h3><p>', "\n") + cat( '<p>This table contains the visual results of the copy number aberration analysis. You can click on an image to jump to the larger version. If segmentation was performed as well the number of segments is shown and a file with genomic regions can be downloaded (just remember to inspect the results carefully as this is a more exprimental step).</p>', "\n", sep='') plots_html <- '' - - cat( '<table class="pure-table pure-table-striped"><thead><tr><th>Sample / File</th><th>CopyNr</th><th>Calls</th><th>Reads</th><th>Regions</th><th>Files</th></tr></thead><tbody>' ) - - for ( bam_file in sampleNames ){ + + colspan <- 1 + binHeader <- "<th>Image</th>" + if ( doSegment ){ # extra column with segment info + colspan <- 2 + binHeader <- "<th>Image</th><th>Segments</th>" + } + cat( '<table class="pure-table pure-table-striped">', "\n" ) + cat( '<thead><tr><th></th><th></th>', as.vector( mapply( paste, "<th colspan=\"", colspan,"\">", binSizes, "kbp</th>", sep="" ) ), '</tr></thead>' ) + cat( '<thead><tr><th>Sample / File</th><th>Reads</th>', rep( binHeader, length(binSizes) ), '</tr></thead>' ) + cat( '<tbody>' ) + + for ( bam_file in bamsNames ){ - #width <- 600; height <- 240 - width <- PLOT_WIDTH; height <- PLOT_HEIGHT - width_t <- 100; height_t <- 40 - - ## add thumbnails to table with links to anchors on html page - copy_img <- plotted_images[[ bam_file ]][[ 'CopyNumbers' ]] - usedReads <- plotted_images[[ bam_file ]][[ 'usedReads' ]] + usedReads <- plotted_images[[ paste(binSize, bam_file, 'usedReads', sep="_" ) ]] usedReads <- format( as.integer(usedReads), digits=4, decimal.mark=".", big.mark="," ) + htmlRow <- paste( '<tr><td>', bam_file, '</td><td>', usedReads, '</td>', sep='' ) - html_copy_thumb <- htmlLink( path=paste('#', copy_img, sep=''), paste('<img src="',copy_img,'" alt="', bam_file, '" width="', width_t, '" height="', height_t, '">', sep='') ) - html_copy_img <- htmlLink( path=copy_img, paste('<img id="', copy_img,'" src="',copy_img,'" alt="',bam_file, '" width="', width, '" height="', height, '">', sep='') ) - html_call_thumb <- 'NA' - html_call_img <- '' - html_bedGraph <- 'NA' - region_count <- 'NA' + for ( binSize in binSizes ){ + + ## add thumbnails to table with links to anchors on html page + copy_img <- plotted_images[[ paste(binSize, bam_file, 'CopyNumbers', sep="_" ) ]] + html_copy_thumb <- htmlLink( path=paste('#', copy_img, sep=''), paste('<img src="',copy_img,'" alt="', bam_file, '" width="', width_t, '" height="', height_t, '">', sep='') ) + html_copy_img <- htmlLink( path=copy_img, paste('<img id="', copy_img,'" src="',copy_img,'" alt="',bam_file, '" width="', width, '" height="', height, '">', sep='') ) + html_segm_img <- '' + html_bedGraph <- '' + region_count <- '' + htmlRow <- paste( htmlRow, '<td>', html_copy_thumb, '</td>' ) - if ( doCall ){ - call_img <- plotted_images[[ bam_file ]][[ 'Called' ]] - region_count <- plotted_images[[ bam_file ]][[ 'region_count' ]] - html_call_thumb <- htmlLink( path=paste('#', call_img, sep=''), paste('<img src="', call_img, '" alt="', bam_file, '" width="', width_t,'" height="', height_t,'">', sep='') ) + if ( doSegment ){ + segm_img <- plotted_images[[ paste(binSize, bam_file, 'Segmented', sep="_" ) ]] + region_count <- plotted_images[[ paste(binSize, bam_file, 'region_count', sep="_" ) ]] - ## list structure with output files just is an array per BAM - html_bedGraph <- htmlLink( path=printedFiles[[ bam_file ]][1], 'bedGraph' ) - html_call_img <- htmlLink( path=call_img, paste('<img id="', call_img,'" src="', call_img,'" alt="', bam_file, '" width="', width, '" height="', height,'">', sep='') ) + html_bedGraph <- htmlLink( path=plotted_images[[ paste(binSize, bam_file, 'bedgraph', sep="_" ) ]], 'bedGraph' ) + html_segm_img <- htmlLink( path=segm_img, paste('<img id="', segm_img,'" src="', segm_img,'" alt="', bam_file, '" width="', width, '" height="', height,'">', sep='') ) + htmlRow <- paste( htmlRow, '<td>', region_count, ' (', html_bedGraph, ')</td>', sep="" ) + } + plots_html <- paste( plots_html, html_copy_img, "\n", html_segm_img, "\n<br \\>\n", sep='' ) } - + plots_html <- paste( plots_html, "\n<hr \\>\n", sep='' ) ## add info to overview table, including small thumbnails - cat( htmlTableRow( c(bam_file, html_copy_thumb, html_call_thumb, usedReads, region_count, paste( html_bedGraph, sep=", ")) ), "\n" ) - ## now include (large) images in html page - plots_html <- paste( plots_html, html_copy_img, "\n", html_call_img, "\n<hr \\>\n", sep='' ) + htmlRow <- paste( htmlRow, '</tr>', sep='' ) + cat( htmlRow, "\n" ) } cat( "</tbody></table></p>", "\n") - - ## ------------------------ - ## create the noise and frequency plots - ## ------------------------ - html_noise_img <- htmlLink( - path=noiseImgName, - paste('<img id="', noiseImgName,'" src="', noiseImgName, '" width="', height/2, '" height="', height/2, '" alt="NoisePlot">', sep='') - ) - html_freq_img <- htmlLink( - path=freqImgName, - paste('<img id="', freqImgName,'" src="', freqImgName, '" width="', width/2, '" height="', height/2, ' alt="FrequenceyPlot">', sep='') - ) - ## there only is a frequency plot when data is called - if ( !doCall ){ html_freq_img <- '' } - - extra_plots_html <- paste( html_noise_img, " \n", html_freq_img, "\n", sep='' ) ## ------------------------ ## section with various output shown ## ------------------------ - cat( '<h3 class="qdnaseq">Results: Analysis plots</h3><p>', "\n") - cat( extra_plots_html, "\n") cat( '<h3 class="qdnaseq">Results: Sample plots</h3><p>', "\n") + ## now include (large) images in html page cat( plots_html, "\n") cat( "\n</p></body>\n") cat( "\n</html>\n") @@ -536,8 +509,6 @@ cat( '<p>Explore <a href="', htmlOutputName, '" class="button">the results</a> directly within galaxy</p>', "\n", sep="") cat( '<p>Or download a <a href="', gzipOutputName, '" class="button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep="" ) - cat( '<p>The zip file contains all output files, including *.rds files allowing you to load the R copyNumber object(s) and perform further detailed analysis or create your own output for further processing. You can load the rds file with <code class="code">loadRDS(file.rds)</code></p>', "\n", sep="") - sink() ## ------------------------