# HG changeset patch # User stef # Date 1408547801 14400 # Node ID 2f0af8970aea8791be182bbe2ff578f9675137a1 # Parent 19e91eb3d06bcd2b49e581823979d743b74a4502 Uploaded diff -r 19e91eb3d06b -r 2f0af8970aea QDNAseq.R --- a/QDNAseq.R Wed Aug 20 11:16:26 2014 -0400 +++ b/QDNAseq.R Wed Aug 20 11:16:41 2014 -0400 @@ -54,63 +54,51 @@ ## -------------------- ## copied code for extracting the regions by segment call status ## -------------------- -fuseRegions <- function( cgh, onlyCalled=T ) { - if ( ncol(cgh) > 1 ) stop('Please specify which sample...') +fuseRegions <- function( obj, minRatio=0 ) { + if ( ncol(obj) > 1 ) stop('Please specify which sample...') - data <- data.frame( cgh@featureData@data[,1:3], calls(cgh), copynumber(cgh), segmented(cgh), check.names=FALSE, stringsAsFactors=FALSE) - colnames( data ) <- c( "chr", "start", "end", "call", "log2", "segmentval" ) + data <- data.frame( obj@featureData@data[,1:3], copynumber(obj), segmented(obj), check.names=FALSE, stringsAsFactors=FALSE) + colnames( data ) <- c( "chr", "start", "end", "log2", "segmentval" ) fused.data <- data.frame() curr.bin <- 1 for ( chr in unique( data$chr ) ) { - chr.data <- data[ data$chr == chr, ] prev.bin <- curr.bin - prev.call <- chr.data[ 1, 'call' ] prev.log2 <- chr.data[ 1, 'log2' ] prev.segm <- chr.data[ 1, 'segmentval' ] start <- chr.data[ 1, 'start' ] if ( nrow(chr.data) > 1) { for ( i in 2:nrow(chr.data) ) { - curr.bin <- curr.bin + 1 - curr.call <- chr.data[ i, 'call'] curr.segm <- chr.data[ i, 'segmentval'] if ( curr.segm != prev.segm ) { - - fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], call=prev.call, segmentval=round(prev.segm, digits=DECIMALS) ) ) - if ( prev.call != 0 ){ - catMsg( c(" ...found called/segmented region (", chr, ':', start, ' call=', prev.call, ' segment=', prev.segm, ")" ) ) - } - prev.call <- curr.call + fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], segmentval=round(prev.segm, digits=DECIMALS) ) ) prev.segm <- curr.segm prev.bin <- curr.bin start <- chr.data[ i, 'start'] } } - fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], call=prev.call, segmentval=round(prev.segm, digits=DECIMALS) ) ) + fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], segmentval=round(prev.segm, digits=DECIMALS) ) ) }else{ - fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], call=prev.call, segmentval=round(prev.segm, digits=DECIMALS) ) ) + fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], segmentval=round(prev.segm, digits=DECIMALS) ) ) } } - ## if requested remove the regions with call status 0 (= normal) - if ( onlyCalled == T ){ - fused.data <- fused.data[ fused.data$call != 0, ] - } + ## remove regions with low amplitude + fused.data <- fused.data[ abs(fused.data$segmentval) >= minRatio, ] fused.data } ## DESC: takes the output of fuse.regions and outputs a txt file per sample -outputRegionsFromList <- function ( regionsList, outputBasename, outputDir="./" ){ +outputRegionsFromList <- function ( regionsList, outputBasename, outputDir="./", binSize, storeList ){ if ( missing(regionsList) ) stop( 'Please provide regionsList...' ) if ( missing(outputBasename) ) stop( 'Please provide outputBasename...' ) if ( !is.list(regionsList) ) stop( 'Input not a list...?' ) if ( length(regionsList) < 1 ) stop( 'List seems empty...?' ) if ( file.exists( outputDir ) ) catMsg( c(" Using dir ", outputDir, " for output") ) else dir.create( outputDir ) - outFiles <- list() ## have to set R output options otherwise scientific method is used at some point options( "scipen"=100 ) @@ -125,21 +113,23 @@ catMsg( c(" Working on sample ", sample ) ) regionCount <- nrow( regionsList[[sample]] ) - outSampleBase <- paste( outputBasename, '_', sample, '_QDNAseqRegions', sep='') + outSampleBase <- paste( outputBasename, '_', sample, '_', binSize, 'kbp', sep='') outBedgraphFile <- paste( outSampleBase, '.bedGraph', sep="" ) outBedgraphPath <- paste( outputDir, '/', outBedgraphFile, sep="" ) ## ---------- BEDGRAPH ---------- - txt <- paste( "track type=bedGraph color=0,100,0 altColor=255,0,0 name=", sample,"_segmReg description=segmented_regions_from_QDNAseq\n", sep="") + txt <- paste( "track type=bedGraph color=0,100,0 altColor=255,0,0 name=", sample," description=segmented_regions_from_QDNAseq_",binSize,"kbp\n", sep="") sink( outBedgraphPath ) cat( txt ) sink() write.table( regionsList[[sample]][,bedgraphColumns], outBedgraphPath, quote=F, sep="\t", row.names=F, append=T, col.names=F) - outFiles[[sample]] <- c( outBedgraphFile ) + #outFiles[[sample]] <- c( outBedgraphFile ) + storeList[[ paste( binSize, sample, 'bedgraph', sep="_")]] <- outBedgraphFile } - outFiles + return(storeList) } + ## ================================================== ## Start of analysis ## ================================================== @@ -149,7 +139,6 @@ DECIMALS <- 3 WEB_LINK <- 'http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html' PURE_CSS <- 'http://yui.yahooapis.com/pure/0.5.0/pure-min.css' -PAR_SET <- list( pch=22 ) catMsg( "Starting QDNAseq wrapper" ) catMsg( "Loading R libraries" ) @@ -164,190 +153,197 @@ ## many variables are imported via sourced "config" source( config ) -PLOT_RES <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 6.3 # desparate try to make png text scale well, damn you R...! -if ( doOutputCallsRds == TRUE ){ doCall <- TRUE } +## desparate tries to make png text scale well, damn you R...! +PLOT_RES <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 6.3 +#PLOT_PS <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 40 +#TXT_FACTOR <- PLOT_WIDTH/480 +#PAR_SET <- list( pch=22, cex.lab=TXT_FACTOR, cex.main=TXT_FACTOR ) +PAR_SET <- list( pch=22 ) systemUser <- system("whoami",T) qdnaseqVersion <- packageDescription( "QDNAseq" )$Version rVersion <- R.version.string -catMsg( c("Analysis run with user: ", systemUser ) ) -catMsg( c("QDNAseq version loaded: ", qdnaseqVersion) ) +startTime <- Sys.time() +analysisStart <- as.character( startTime ) +catMsg( c("QDNAseq version: ", qdnaseqVersion) ) catMsg( c( rVersion ) ) ## get the comma separated list of chromosomes to exclude excludeChrs <- unlist( strsplit( excludeChrsString, ",") ) +binSizes <- as.numeric( unlist( strsplit( binSizesString, ",") ) ) + ## ------------------------ ## DEBUG -#catMsg( "PARAM" ) -#catMsg( galaxy_path ) -#catMsg( repos_path ) -#catMsg( instal_path ) +if ( debug ){ + catMsg( c("Analysis run under user: ", systemUser ) ) + catMsg( c("Here comes sessionInfo: " ) ) + print( sessionInfo() ) +} ## /DEBUG ## ------------------------ -## help msg for running script without wrapper still needs work! -#if ( length(cmdLineArgs) == 0 || cmdLineArgs[1] == "-h" || cmdLineArgs[1] == "--help"){ -# catMsg( c( "Usage: ", paste(params_help,sep=",") ) ) -# quit( save = 'no', status=0, runLast=F ) -#} - +## prepare output dir if ( !file.exists( outputPath) ){ dir.create( outputPath ) } -## because we circumvent params that galaxy can save, we want to ## copy source config file to output dir to include it in output zip -file.copy( config, paste(outputPath, 'qdnaseq_config_file.R', sep='/') ) - -## ------------------------ -## construct output file-names and -paths -## ------------------------ -htmlOutputName <- 'index.html' -gzipOutputName <- paste( 'QDNAseqResults_', binSize, 'kbp_', outputName, '.zip', sep='' ) -robjReadCoName <- paste( binSize, 'kbp_QDNAseqReadCounts.rds', sep='') -robjCopyNrName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.rds', sep='') -robjCalledName <- paste( binSize, 'kbp_QDNAseqCalledSegments.rds', sep='') -regiOutputName <- paste( binSize, 'kbp_QDNAseqRegions.rds', sep='') -igvCopyNrName <- paste( binSize, 'kbp_QDNAseq-normalized.igv', sep='') -noiseImgName <- paste( 'QDNAseqNoisePlot.png', sep='') -freqImgName <- paste( 'QDNAseqFrequencyPlot.png', sep='') - -gzipOutputPath <- paste( outputPath, '/', gzipOutputName, sep='') -htmlOutputPath <- paste( outputPath, '/', htmlOutputName, sep='') -robjReadCoPath <- paste( outputPath, '/', robjReadCoName, sep='') -robjCopyNrPath <- paste( outputPath, '/', robjCopyNrName, sep='') -robjCalledPath <- paste( outputPath, '/', robjCalledName, sep='') -robjRegionPath <- paste( outputPath, '/', regiOutputName, sep='') -igvCopyNrPath <- paste( outputPath, '/', igvCopyNrName, sep='') -noiseImgPath <- paste( outputPath, '/', noiseImgName, sep='' ) -freqImgPath <- paste( outputPath, '/', freqImgName, sep='' ) +file.copy( config, paste(outputPath, 'galaxyConfigFile.R', sep='/') ) ## setup bam filelist for easy retrieval later fileList <- makeBamFileList( bamsPaths, bamsNames ) bamCount <- length( fileList[[ 'all_paths' ]] ) -## ------------------------ -## performing QDNAseq analysis steps +gzipOutputName <- paste( 'QDNAseqResults_', outputName, '.zip', sep='' ) +gzipOutputPath <- paste( outputPath, '/', gzipOutputName, sep='') +htmlOutputName <- 'index.html' +htmlOutputPath <- paste( outputPath, '/', htmlOutputName, sep='') + +plotted_images <- list() # to keep track of images for later linking +regions <- list() # will contain the segments +outputFiles <- list() + ## ------------------------ -if ( debug ){ ## in case of debug just use inbuilt LGG data for speedup - data(LGG150) - readCounts <- LGG150 - binSize <- 15 - bamsPaths <- c( "BUILD_IN_DATA_LGG150") +## in case of debug just use inbuilt LGG data for speedup +if ( debug ){ + binSizes <- c(15) + bamsPaths <- c( "BUILD_IN_DATA") bamsNames <- c( "LGG150") fileList <- makeBamFileList( bamsPaths, bamsNames ) bamCount <- length( fileList[[ 'all_paths' ]] ) -}else{ - if ( nchar(binAnnotations) == 0 ){ - binAnnotations <- getBinAnnotations( binSize=binSize, type=experimentType ) - }else{ - ## if user provided file, check if correct class - if ( class(binAnnotations)[1] != 'AnnotatedDataFrame' ){ - stop( "Provided binAnnotations file is not of class 'AnnotatedDataFrame'\n" ) - } - binAnnotations <- readRDS( binAnnotations ) - } - ## provide bamnames because in galaxy everyting is called "dataset_###" - readCounts <- binReadCounts( binAnnotations, bamfiles=fileList[[ 'all_paths' ]], bamnames=bamsNames ) } -readCountsFiltered <- applyFilters( readCounts, residual=TRUE, blacklist=filterBlacklistedBins, mappability=mappabilityCutoff, chromosomes=excludeChrs ) -readCountsFiltered <- estimateCorrection( readCountsFiltered ) -copyNumbers <- correctBins( readCountsFiltered ) -copyNumbersNormalized <- normalizeBins( copyNumbers ) -copyNumbersSmooth <- smoothOutlierBins( copyNumbersNormalized ) -sampleNames <- readCountsFiltered@phenoData@data$name +for ( binSize in binSizes ){ + + ## ------------------------ + ## construct output file-names and -paths + ## ------------------------ + robjReadCoName <- paste( binSize, 'kbp_QDNAseqReadCounts.rds', sep='') + robjCopyNrName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.rds', sep='') + igvCopyNrName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.igv', sep='') + robjSegmntName <- paste( binSize, 'kbp_QDNAseqCopyNumbersSegmented.rds', sep='') + regiOutputName <- paste( binSize, 'kbp_QDNAseqRegions.rds', sep='') + noiseImgName <- paste( binSize, 'kbp_QDNAseqNoiseplot.png', sep='') + + robjReadCoPath <- paste( outputPath, '/', robjReadCoName, sep='') + robjCopyNrPath <- paste( outputPath, '/', robjCopyNrName, sep='') + robjSegmntPath <- paste( outputPath, '/', robjSegmntName, sep='') + robjRegionPath <- paste( outputPath, '/', regiOutputName, sep='') + igvCopyNrPath <- paste( outputPath, '/', igvCopyNrName, sep='') + noiseImgPath <- paste( outputPath, '/', noiseImgName, sep='') + + binAnnotations <- getBinAnnotations( binSize=binSize, type=experimentType ) + + ## in case of debug just use inbuilt LGG data for speedup + if ( debug ){ + data(LGG150) + readCounts <- LGG150 + }else{ + ## provide bamnames because in galaxy everyting is called "dataset_###" + readCounts <- binReadCounts( binAnnotations, bamfiles=fileList[[ 'all_paths' ]], bamnames=fileList[[ 'all_names' ]] ) + } -## save objects to output dir -saveRDS( readCountsFiltered, robjReadCoPath ); -saveRDS( copyNumbersSmooth, robjCopyNrPath ); -exportBins( copyNumbersSmooth, file=igvCopyNrPath, format="igv" ) + readCountsFiltered <- applyFilters( readCounts, residual=TRUE, blacklist=filterBlacklistedBins, mappability=mappabilityCutoff, chromosomes=excludeChrs ) + readCountsFiltered <- estimateCorrection( readCountsFiltered ) + copyNumbers <- correctBins( readCountsFiltered ) + copyNumbersNormalized <- normalizeBins( copyNumbers ) + copyNumbersSmooth <- smoothOutlierBins( copyNumbersNormalized ) + sampleNames <- readCountsFiltered@phenoData@data$name + + ## save objects to output dir + saveRDS( readCountsFiltered, robjReadCoPath ); + saveRDS( copyNumbersSmooth, robjCopyNrPath ); + exportBins( copyNumbersSmooth, file=igvCopyNrPath, format="igv" ) + + ## also save objects for galaxy history output if requested + if ( doOutputCopynumbersIgv ){ + #@ a bit hacky galaxy way to allow an unknown number of output files based on param selection + #@ see: https://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files + historyName <- paste(binSize, 'kbp-IGV', sep="") + igvFile <- paste( newFilePath, "/primary_", outputId, "_", historyName, "_visible_txt", sep="" ) + exportBins( copyNumbersSmooth, file=igvFile, format="igv" ) + catMsg( c("Exported igv file to history for ", binSize, "kbp bin") ) + } + + ## proceed with calling if requested + if ( doSegment ){ + copyNumbersSegmented <- segmentBins( copyNumbersSmooth, undo.splits=undoSplits, undo.SD=undoSD ) + copyNumbersSegmented <- normalizeSegmentedBins( copyNumbersSegmented ) + cgh <- makeCgh( copyNumbersSegmented ) + saveRDS( copyNumbersSegmented, robjSegmntPath ); + } -## also save objects for galaxy history output if requested -if ( doOutputReadcountsRds ){ - saveRDS( readCountsFiltered, readCountsDatasetFile ); -} -if ( doOutputCopynumbersRds ){ - saveRDS( copyNumbersSmooth, copyNumbersDatasetFile ); -} -if ( doOutputCopynumbersIgv ){ - saveRDS( copyNumbersSmooth, copyNumbersIgvDatasetFile ); -} + ## ------------------------ + ## create output files + ## ------------------------ + png( noiseImgPath, width=PLOT_HEIGHT, height=PLOT_HEIGHT, res=PLOT_RES ); + par( PAR_SET ) + noisePlot( readCountsFiltered, main=paste( "Noise Plot ", binSize, "kbp", sep=''), col="darkgreen" ) + dev.off() + + binSize <- as.character( binSize ) # to avoid R using it as array index... (*#$^@ you R!) + binSizeString <- paste( binSize, 'kbp', sep='') + + for (i in 1:length(sampleNames) ){ + + sample <- sampleNames[i] + usedReads <- readCountsFiltered@phenoData@data$used.reads[i] + catMsg( c("Creating plots for sample: ", sample, " (", binSizeString, ")" ) ) + + type <- 'CopyNumbers' + img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='') + img_file_path <- paste( outputPath, '/', img_file, sep='' ) -## proceed with calling if requested -if ( doCall ){ - copyNumbersSegmented <- segmentBins( copyNumbersSmooth, undo.splits=undoSplits, undo.SD=undoSD ) - copyNumbersSegmented <- normalizeSegmentedBins( copyNumbersSegmented ) - copyNumbersCalled <- callBins( copyNumbersSegmented ) - cgh <- makeCgh( copyNumbersCalled ) - saveRDS( copyNumbersCalled, robjCalledPath ); - if ( doOutputCallsRds ){ - saveRDS( copyNumbersCalled, calledSegmentsDatasetFile ); + ## COPYNUMBER PLOT + png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); + par( PAR_SET ) + plot( copyNumbersSmooth[ ,sample ], main=paste(sample, ": CopyNumbers", sep="") ); + mtext( paste( "(", binSizeString, " bins)" sep=""), 3 ) + abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" ) + dev.off() + + plotted_images[[ paste(binSize, sample, type, sep="_" ) ]] <- img_file + + if ( doSegment ){ + type <- 'Segmented' + img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='') + img_file_path <- paste( outputPath, '/', img_file, sep='' ) + + ## COPYNUMBER PLOT + png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); + par( PAR_SET ) + plot( copyNumbersSegmented[ ,sample ], main=paste(sample, ": CopyNumbers and Segments", sep="") ); + mtext( paste( "(", binSizeString, " bins)" sep=""), 3 ) + abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" ) + dev.off() + + plotted_images[[ paste(binSize, sample, type, sep="_" ) ]] <- img_file + + catMsg( c(" Fusing regions of sample: ", sample) ) + regions[[ sample ]] <- fuseRegions( cgh[, sample] ) + + region_count <- nrow( data.frame( regions[[ sample ]] ) ) + catMsg( c( ' sample "', sample, '" has ', region_count, " regions" ) ) + plotted_images[[ paste(binSize, sample, 'region_count', sep="_" ) ]] <- region_count + } + + ## add USED read counts + plotted_images[[ paste(binSize, sample, 'usedReads', sep="_" ) ]] <- usedReads } - ## actual calls are not included in exportBins output, so not used now - #exportBins( copyNumbersCalled, file=igvCalledPath, format="igv") -} + if ( doSegment ){ + saveRDS( regions, robjRegionPath ) + plotted_images <- outputRegionsFromList( regions, outputBasename=outputName, outputDir=outputPath, binSize=binSize, storeList=plotted_images ) + } +}# end bin -## ------------------------ -## create output files -## ------------------------ -plotted_images <- list() # to keep track of images for later linking -regions <- list() # will contain the (called) segments - -png( noiseImgPath, width=PLOT_HEIGHT, height=PLOT_HEIGHT, res=PLOT_RES ); - par( PAR_SET ) - noisePlot( readCountsFiltered, col="darkgreen" ) -dev.off() - -if ( doCall ){ - png( freqImgPath, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); - par( PAR_SET ) - frequencyPlot( copyNumbersCalled ) - dev.off() -} - -for (i in 1:length(sampleNames) ){ - sample <- sampleNames[i] - usedReads <- readCountsFiltered@phenoData@data$used.reads[i] - catMsg( c("Creating plots for sample: ", sample ) ) +## ----- debug ----- +#catMsg( "done" ) +#q(status=0) +## ---- /debug ----- - type <- 'CopyNumbers' - img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='') - img_file_path <- paste( outputPath, '/', img_file, sep='' ) - png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); - par( PAR_SET ) - plot( copyNumbersSmooth[ ,sample ] ); - dev.off() - plotted_images[[ sample ]][[ type ]] <- img_file - - if ( doCall ){ - type <- 'Called' - img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='') - img_file_path <- paste( outputPath, '/', img_file, sep='' ) - png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); - par( PAR_SET ) - plot( copyNumbersCalled[ ,sample ] ); - dev.off() - plotted_images[[ sample ]][[ type ]] <- img_file - - catMsg( c(" Fusing regions of sample: ", sample) ) - regions[[ sample ]] <- fuseRegions( cgh[, sample] ) - region_count <- nrow( data.frame( regions[[ sample ]] ) ) - catMsg( c( ' sample "', sample, '" has ', region_count, " regions" ) ) - plotted_images[[ sample ]][[ 'region_count' ]] <- region_count - } - - ## add USED read counts - plotted_images[[ sample ]][[ 'usedReads' ]] <- usedReads -} - -if ( doCall ){ - saveRDS( regions, robjRegionPath ) - printedFiles <- outputRegionsFromList( regions, outputBasename=outputName, outputDir=outputPath ) -} ## ------------------------ ## prepare output @@ -359,27 +355,22 @@ ## ------------------------ ## get filesizes for report ## ------------------------ -zippedSize <- paste( round( file.info( gzipOutputPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) -readCoSize <- paste( round( file.info( robjReadCoPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) -copyNrSize <- paste( round( file.info( robjCopyNrPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) -calledSize <- paste( round( file.info( robjCalledPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) -regionSize <- paste( round( file.info( robjRegionPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) -igvCopyNrSize <- paste( round( file.info( igvCopyNrPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) +zippedSize <- paste( round( file.info( gzipOutputPath )[["size"]] / 1e+6, digits=2 ), 'MB' ) +endTime <- Sys.time() +timeDiff <- format( round( endTime - startTime, 3 ) ) +analysisEnd <- as.character( endTime ) ## ------------------------ ## creating html output to be linked to from the middle galaxy pane ## ------------------------ - -#sink( file = outputHtml, type = "output" ) sink( file = htmlOutputPath, type = "output" ) cat( "\n") cat( "\n") - #cat( '', "\n" ) + cat( "\t", 'QDNAseq Report | ', outputName,'', "\n", sep='' ) cat( "\t", '', "\n", sep='' ) cat( "\t\n" ) @@ -398,8 +389,10 @@ cat( '

Settings

', "\n") cat( '' ) cat( htmlTableRow( c( "AnalysisName", outputName ) ) ) - cat( htmlTableRow( c( "AnalysisDate", as.character(Sys.time()) ) ) ) - cat( htmlTableRow( c( "BinSize (kb)", binSize ) ) ) + cat( htmlTableRow( c( "AnalysisStart", analysisStart ) ) ) + cat( htmlTableRow( c( "AnalysisEnd", analysisEnd ) ) ) + cat( htmlTableRow( c( "AnalysisTime", timeDiff ) ) ) + cat( htmlTableRow( c( "BinSizes (kbp)", paste(binSizes,collapse=", ") ) ) ) cat( htmlTableRow( c( "R info", rVersion ) ) ) cat( htmlTableRow( c( "QDNAseq info", qdnaseqVersion ) ) ) @@ -415,103 +408,83 @@ ## ------------------------ ## list with links to all output files ## ------------------------ - - cat( '

Output files

', "\n") - cat( '

', "\n" ) - cat( '
', htmlLink( path=robjReadCoName, robjReadCoName ), '
', "\n" ) - cat( '
QDNAseq object with read counts per bin, ', readCoSize,'
', "\n" ) - + cat( '

This table contains output files that can be used for local downstream analysis with the bioconductor QDNAseq package. For each bin-size / data-level there is a R data structure file with data of all samples. See ', htmlLink( WEB_LINK, 'the bioconductor QDNAseq documentation' ), ' for more information on how to work with these files

', "\n", sep='') + cat( '
', "\n" ) + cat( '', as.vector( mapply( paste, "", sep="" ) ),'', "\n" ) + cat( "", "\n") + files <- list() + fileTypes <- c( 'ReadCounts.rds', 'CopyNumbers.rds' ) + if ( doSegment ){ fileTypes <- c( fileTypes, 'CopyNumbersSegmented.rds') } - cat( '
', htmlLink( path=robjCopyNrName, robjCopyNrName ), '
', "\n" ) - cat( '
QDNAseq object with copy numbers per bin, ', copyNrSize,'
', "\n" ) - - cat( '
', htmlLink( path=igvCopyNrName, igvCopyNrName ), '
', "\n" ) - cat( '
IGV formatted text file with copy numbers per bin, ', igvCopyNrSize,'
', "\n" ) - - if ( doCall ){ - cat( '
', htmlLink( path=robjCalledName, robjCalledName ), '
', "\n" ) - cat( '
QDNAseq object with segment and call status per bin, ', calledSize,'
', "\n" ) - - cat( '
', htmlLink( path=regiOutputName, regiOutputName ), '
', "\n" ) - cat( '
list with segmented/called regions for each sample, ', regionSize, '
', "\n" ) - } - cat( '

', "\n" ) - - r_code <- '## start own downstream analysis in R with:' - r_code <- c( r_code, 'library( QDNAseq )' ) - r_code <- c( r_code, paste( 'readRDS( ', robjReadCoName, ' ) -> readCounts', sep="") ) - - ## r_code shows example code on how to load output files - #cat( r_code, "

\n", sep="\n") - cat( '

See ', htmlLink( WEB_LINK, 'the bioconductor QDNAseq documentation' ), ' for more information on how to work with these files

', "\n", sep='' ) - cat( '
', paste( r_code, collapse="\n" ), '
', "\n", sep='' ) + for ( fileType in fileTypes ){ + fileNames <- mapply( paste, binSizes, paste( 'kbp_QDNAseq', fileType, sep=''), sep='') + fileLinks <- mapply( htmlLink, fileNames, paste( binSizes, "kbp", sep="" ) ) + cat( htmlTableRow( c( fileType, fileLinks ) ) ) + } + cat( "\n
Type", binSizes, "kbp

", "\n") ## ------------------------ ## table with links to files ## ------------------------ + ratio <- PLOT_WIDTH / PLOT_HEIGHT + width <- 960; height <- width / ratio ## bigger img + width_t <- 100; height_t <- 40 ## thumb img + cat( '

Results: overview

', "\n") + cat( '

This table contains the visual results of the copy number aberration analysis. You can click on an image to jump to the larger version. If segmentation was performed as well the number of segments is shown and a file with genomic regions can be downloaded (just remember to inspect the results carefully as this is a more exprimental step).

', "\n", sep='') plots_html <- '' - - cat( '' ) - - for ( bam_file in sampleNames ){ + + colspan <- 1 + binHeader <- "" + if ( doSegment ){ # extra column with segment info + colspan <- 2 + binHeader <- "" + } + cat( '
Sample / FileCopyNrCallsReadsRegionsFiles
ImageImageSegments
', "\n" ) + cat( '', as.vector( mapply( paste, "", sep="" ) ), '' ) + cat( '', rep( binHeader, length(binSizes) ), '' ) + cat( '' ) + + for ( bam_file in bamsNames ){ - #width <- 600; height <- 240 - width <- PLOT_WIDTH; height <- PLOT_HEIGHT - width_t <- 100; height_t <- 40 - - ## add thumbnails to table with links to anchors on html page - copy_img <- plotted_images[[ bam_file ]][[ 'CopyNumbers' ]] - usedReads <- plotted_images[[ bam_file ]][[ 'usedReads' ]] + usedReads <- plotted_images[[ paste(binSize, bam_file, 'usedReads', sep="_" ) ]] usedReads <- format( as.integer(usedReads), digits=4, decimal.mark=".", big.mark="," ) + htmlRow <- paste( '', sep='' ) - html_copy_thumb <- htmlLink( path=paste('#', copy_img, sep=''), paste('', bam_file, '', sep='') ) - html_copy_img <- htmlLink( path=copy_img, paste('',bam_file, '', sep='') ) - html_call_thumb <- 'NA' - html_call_img <- '' - html_bedGraph <- 'NA' - region_count <- 'NA' + for ( binSize in binSizes ){ + + ## add thumbnails to table with links to anchors on html page + copy_img <- plotted_images[[ paste(binSize, bam_file, 'CopyNumbers', sep="_" ) ]] + html_copy_thumb <- htmlLink( path=paste('#', copy_img, sep=''), paste('', bam_file, '', sep='') ) + html_copy_img <- htmlLink( path=copy_img, paste('',bam_file, '', sep='') ) + html_segm_img <- '' + html_bedGraph <- '' + region_count <- '' + htmlRow <- paste( htmlRow, '' ) - if ( doCall ){ - call_img <- plotted_images[[ bam_file ]][[ 'Called' ]] - region_count <- plotted_images[[ bam_file ]][[ 'region_count' ]] - html_call_thumb <- htmlLink( path=paste('#', call_img, sep=''), paste('', bam_file, '', sep='') ) + if ( doSegment ){ + segm_img <- plotted_images[[ paste(binSize, bam_file, 'Segmented', sep="_" ) ]] + region_count <- plotted_images[[ paste(binSize, bam_file, 'region_count', sep="_" ) ]] - ## list structure with output files just is an array per BAM - html_bedGraph <- htmlLink( path=printedFiles[[ bam_file ]][1], 'bedGraph' ) - html_call_img <- htmlLink( path=call_img, paste('', bam_file, '', sep='') ) + html_bedGraph <- htmlLink( path=plotted_images[[ paste(binSize, bam_file, 'bedgraph', sep="_" ) ]], 'bedGraph' ) + html_segm_img <- htmlLink( path=segm_img, paste('', bam_file, '', sep='') ) + htmlRow <- paste( htmlRow, '', sep="" ) + } + plots_html <- paste( plots_html, html_copy_img, "\n", html_segm_img, "\n
\n", sep='' ) } - + plots_html <- paste( plots_html, "\n
\n", sep='' ) ## add info to overview table, including small thumbnails - cat( htmlTableRow( c(bam_file, html_copy_thumb, html_call_thumb, usedReads, region_count, paste( html_bedGraph, sep=", ")) ), "\n" ) - ## now include (large) images in html page - plots_html <- paste( plots_html, html_copy_img, "\n", html_call_img, "\n
\n", sep='' ) + htmlRow <- paste( htmlRow, '', sep='' ) + cat( htmlRow, "\n" ) } cat( "
", binSizes, "kbp
Sample / FileReads
', bam_file, '', usedReads, '', html_copy_thumb, '', region_count, ' (', html_bedGraph, ')

", "\n") - - ## ------------------------ - ## create the noise and frequency plots - ## ------------------------ - html_noise_img <- htmlLink( - path=noiseImgName, - paste('NoisePlot', sep='') - ) - html_freq_img <- htmlLink( - path=freqImgName, - paste('', sep='') - ) - ## there only is a frequency plot when data is called - if ( !doCall ){ html_freq_img <- '' } - - extra_plots_html <- paste( html_noise_img, " \n", html_freq_img, "\n", sep='' ) ## ------------------------ ## section with various output shown ## ------------------------ - cat( '

Results: Analysis plots

', "\n") - cat( extra_plots_html, "\n") cat( '

Results: Sample plots

', "\n") + ## now include (large) images in html page cat( plots_html, "\n") cat( "\n

\n") cat( "\n\n") @@ -536,8 +509,6 @@ cat( '

Explore the results directly within galaxy

', "\n", sep="") cat( '

Or download a zipfile with all output (', zippedSize, ')

', "\n", sep="" ) - cat( '

The zip file contains all output files, including *.rds files allowing you to load the R copyNumber object(s) and perform further detailed analysis or create your own output for further processing. You can load the rds file with loadRDS(file.rds)

', "\n", sep="") - sink() ## ------------------------