Mercurial > repos > stef > qdnaseq
diff QDNAseq.R @ 78:81ba2f857fe2 draft
Uploaded
author | stef |
---|---|
date | Wed, 04 Mar 2015 08:42:14 -0500 |
parents | 4588384edba1 |
children | 05e5358b8828 |
line wrap: on
line diff
--- a/QDNAseq.R Thu Nov 06 05:48:52 2014 -0500 +++ b/QDNAseq.R Wed Mar 04 08:42:14 2015 -0500 @@ -131,14 +131,21 @@ return(storeList) } +## ================================================== +## Unused but potential usefull code +## ================================================== +#@ a bit hacky galaxy way to allow an unknown number of output files based on param selection +#@ see: https://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files +#historyName <- paste(binSize, 'kbp-IGV', sep="") +#igvFile <- paste( newFilePath, "/primary_", outputId, "_", historyName, "_visible_txt", sep="" ) ## ================================================== ## Start of analysis ## ================================================== MAIN_NAME <- '[INFO] ' catMsg( "Starting QDNAseq wrapper" ) +#catMsg( R.version.string ) catMsg( "Loading R libraries" ) -catMsg( R.version.string ) ## supress msg to allow R to finish with non-error msg suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) ) @@ -157,6 +164,9 @@ ## many variables are imported via sourced "config" source( config ) +## if calling requested we always need segmenting first as well +if ( doCall ){ doSegment <- TRUE } + ## desparate tries to make png text scale well, damn you R...! PLOT_RES <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 6.3 PAR_SET <- list( pch=22 ) @@ -171,7 +181,15 @@ ## get the comma separated list of chromosomes to exclude excludeChrs <- unlist( strsplit( excludeChrsString, ",") ) -binSizes <- as.numeric( unlist( strsplit( binSizesString, ",") ) ) + +## format binSizes back to integers because stupid galaxy doesn't do what I want +#print( binSizesString ) +binSizes <- gsub( 'kb', '', binSizesString ) # remove the kb string to get integers +#print( binSizes ) +binSizes <- gsub( 'bin', '', binSizes ) # remove the kb string to get integers +#print( binSizes ) +binSizes <- as.numeric( unlist( strsplit( binSizes, ",") ) ) +#print( binSizes ) ## ------------------------ @@ -205,11 +223,11 @@ plotted_images <- list() # to keep track of images for later linking regions <- list() # will contain the segments -outputFiles <- list() ## ------------------------ ## in case of debug just use inbuilt LGG data for speedup if ( debug ){ + catMsg( c("Built in data only contains binsize 15kb so overriding chosen binSizes to single 15kb") ) binSizes <- c(15) bamsPaths <- c( "BUILD_IN_DATA") bamsNames <- c( "LGG150") @@ -219,22 +237,23 @@ for ( binSize in binSizes ){ + catMsg( c("Starting analysis for binSize: ", binSize) ) ## ------------------------ ## construct output file-names and -paths ## ------------------------ - robjReadCoName <- paste( binSize, 'kbp_QDNAseqReadCounts.rds', sep='') - robjCopyNrName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.rds', sep='') - igvCopyNrName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.igv', sep='') - robjSegmntName <- paste( binSize, 'kbp_QDNAseqCopyNumbersSegmented.rds', sep='') + rdsReadName <- paste( binSize, 'kbp_QDNAseqReadCounts.rds', sep='') + rdsCopyName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.rds', sep='') + rdsSegmName <- paste( binSize, 'kbp_QDNAseqCopyNumbersSegmented.rds', sep='') + rdsCallName <- paste( binSize, 'kbp_QDNAseqCopyNumbersCalled.rds', sep='') + igvCopyName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.igv', sep='') + igvSegmName <- paste( binSize, 'kbp_QDNAseqCopyNumbersSegmented.igv', sep='') + igvCallName <- paste( binSize, 'kbp_QDNAseqCopyNumbersCalled.igv', sep='') + regiOutputName <- paste( binSize, 'kbp_QDNAseqRegions.rds', sep='') noiseImgName <- paste( binSize, 'kbp_QDNAseqNoiseplot.png', sep='') - robjReadCoPath <- paste( outputPath, '/', robjReadCoName, sep='') - robjCopyNrPath <- paste( outputPath, '/', robjCopyNrName, sep='') - robjSegmntPath <- paste( outputPath, '/', robjSegmntName, sep='') - robjRegionPath <- paste( outputPath, '/', regiOutputName, sep='') - igvCopyNrPath <- paste( outputPath, '/', igvCopyNrName, sep='') - noiseImgPath <- paste( outputPath, '/', noiseImgName, sep='') + rdsRegiPath <- paste( outputPath, '/', regiOutputName, sep='') + noiseImgPath <- paste( outputPath, '/', noiseImgName, sep='') binAnnFile <- paste( TOOL_PATH, '/static/binannotation/', binSize, 'kbp_binAnnotations.rds', sep="" ) if ( file.exists(binAnnFile) ){ @@ -246,7 +265,7 @@ ## in case of debug just use inbuilt LGG data for speedup if ( debug ){ - data(LGG150) + data( LGG150 ) readCounts <- LGG150 }else{ ## provide bamnames because in galaxy everyting is called "dataset_###" @@ -260,27 +279,49 @@ copyNumbersSmooth <- smoothOutlierBins( copyNumbersNormalized ) sampleNames <- readCountsFiltered@phenoData@data$name - ## save objects to output dir - saveRDS( readCountsFiltered, robjReadCoPath ); - saveRDS( copyNumbersSmooth, robjCopyNrPath ); - exportBins( copyNumbersSmooth, file=igvCopyNrPath, format="igv" ) + ## set file to output if output requested + outputData <- copyNumbersSmooth + outputType <- 'copynumber' + outputLogT <- TRUE + rdsReadPath <- paste( outputPath, '/', rdsReadName, sep='') + saveRDS( readCounts, rdsReadPath ); + rdsPath <- paste( outputPath, '/', rdsCopyName, sep='') + igvPath <- paste( outputPath, '/', igvCopyName, sep='') + + ## proceed with segmenting / calling if requested + if ( doSegment ){ + copyNumbersSegmented <- segmentBins( copyNumbersSmooth, undo.splits=undoSplits, undo.SD=undoSD, transformFun="sqrt" ) + copyNumbersSegmented <- normalizeSegmentedBins( copyNumbersSegmented ) + outputData <- copyNumbersSegmented + outputType <- 'segments' + igvPath <- paste( outputPath, '/', rdsSegmName, sep='') + rdsPath <- paste( outputPath, '/', igvSegmName, sep='') + } + if ( doCall ){ + copyNumbersCalled <- callBins(copyNumbersSegmented) + outputData <- copyNumbersCalled + outputType <- 'calls' + outputLogT <- FALSE # call values should not be transformed at output + rdsPath <- paste( outputPath, '/', rdsCallName, sep='') + igvPath <- paste( outputPath, '/', igvCallName, sep='') + } + + ## save the QDNAseq objects and tsv file of highest level (calls or segments) + saveRDS( outputData, rdsPath ); + exportBins( outputData, file=igvPath, format="igv", type=outputType, logTransform=outputLogT ) ## also save objects for galaxy history output if requested - if ( doOutputCopynumbersIgv ){ - #@ a bit hacky galaxy way to allow an unknown number of output files based on param selection - #@ see: https://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files - historyName <- paste(binSize, 'kbp-IGV', sep="") - igvFile <- paste( newFilePath, "/primary_", outputId, "_", historyName, "_visible_txt", sep="" ) - exportBins( copyNumbersSmooth, file=igvFile, format="igv" ) - catMsg( c("Exported igv file to history for ", binSize, "kbp bin") ) + if ( txt2history ){ + fileId <- paste('txt_', binSize, sep='') + historyOutputPath <- historyOutputFiles[[ fileId ]] + catMsg( c("About to export igv/txt file to history for ", binSize, "kbp bin") ) + exportBins( outputData, file=historyOutputPath, format="igv", type=outputType, logTransform=outputLogT ) } - - ## proceed with calling if requested - if ( doSegment ){ - copyNumbersSegmented <- segmentBins( copyNumbersSmooth, undo.splits=undoSplits, undo.SD=undoSD ) - copyNumbersSegmented <- normalizeSegmentedBins( copyNumbersSegmented ) - cgh <- makeCgh( copyNumbersSegmented ) - saveRDS( copyNumbersSegmented, robjSegmntPath ); + if ( rds2history ){ + fileId <- paste('rds_', binSize, sep='') + rdsHistoryOutputPath <- historyOutputFiles[[ fileId ]] + catMsg( c("About to export rds file to history for ", binSize, "kbp bin") ) + saveRDS( outputData, file=rdsHistoryOutputPath ) } ## ------------------------ @@ -293,6 +334,7 @@ binSize <- as.character( binSize ) # to avoid R using it as array index... (*#$^@ you R!) binSizeString <- paste( binSize, 'kbp', sep='') + cgh <- makeCgh( outputData ) # needed for fuseRegions function for (i in 1:length(sampleNames) ){ @@ -308,7 +350,7 @@ png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); par( PAR_SET ) plot( copyNumbersSmooth[ ,sample ], main=paste(sample, ": CopyNumbers", sep="") ) - mtext( paste( binSizeString, " bins", sep=""), 3 ) + mtext( paste( "(", binSizeString, " bins)", sep=""), 3 ) abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" ) dev.off() @@ -319,16 +361,17 @@ img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='') img_file_path <- paste( outputPath, '/', img_file, sep='' ) - ## COPYNUMBER PLOT + ## COPYNUMBER + SEGMENTS PLOT png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); par( PAR_SET ) - plot( copyNumbersSegmented[ ,sample ], main=paste(sample, ": CopyNumbers and Segments", sep="") ) + plot( copyNumbersSegmented[ ,sample ], main=paste(sample, ": CopyNumbers (Segmented)", sep="") ) mtext( paste( "(", binSizeString, " bins)", sep=""), 3 ) abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" ) dev.off() plotted_images[[ paste(binSize, sample, type, sep="_" ) ]] <- img_file + ## if segmented we can also retrieve the segment locations catMsg( c(" Fusing regions of sample: ", sample) ) regions[[ sample ]] <- fuseRegions( cgh[, sample] ) @@ -336,13 +379,29 @@ catMsg( c( ' sample "', sample, '" has ', region_count, " regions" ) ) plotted_images[[ paste(binSize, sample, 'region_count', sep="_" ) ]] <- region_count } + + if ( doCall ){ + type <- 'Called' + img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='') + img_file_path <- paste( outputPath, '/', img_file, sep='' ) + + ## COPYNUMBER + SEGMENTS + CALLS PLOT + png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); + par( PAR_SET ) + plot( copyNumbersCalled[ ,sample ], main=paste(sample, ": CopyNumbers (Segmented and Called)", sep="") ) + mtext( paste( "(", binSizeString, " bins)", sep=""), 3 ) + abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" ) + dev.off() + + plotted_images[[ paste(binSize, sample, type, sep="_" ) ]] <- img_file + } ## add USED read counts plotted_images[[ paste(binSize, sample, 'usedReads', sep="_" ) ]] <- usedReads } if ( doSegment ){ - saveRDS( regions, robjRegionPath ) + saveRDS( regions, rdsRegiPath ) plotted_images <- outputRegionsFromList( regions, outputBasename=outputName, outputDir=outputPath, binSize=binSize, storeList=plotted_images ) } }# end bin @@ -418,13 +477,20 @@ ## list with links to all output files ## ------------------------ cat( '<h3 class="qdnaseq">Output files</h3><p>', "\n") - cat( '<p>This table contains output files that can be used for local downstream analysis with the bioconductor QDNAseq package. For each bin-size / data-level there is a R data structure file with data of all samples. See ', htmlLink( WEB_LINK, 'the bioconductor QDNAseq documentation' ), ' for more information on how to work with these files</p>', "\n", sep='') + cat( '<p>This table contains output files that can be used for local downstream analysis with the bioconductor QDNAseq package. For each bin-size / data-level there is a R data structure file with data of all samples. See ', htmlLink( WEB_LINK, 'the bioconductor QDNAseq documentation' ), ' for more information on how to work with these files.</p>', "\n", sep='') cat( '<table class="pure-table pure-table-striped">', "\n" ) cat( '<thead><th>Type</th>', as.vector( mapply( paste, "<th>", binSizes, "kbp</th>", sep="" ) ),'</thead>', "\n" ) cat( "<tbody>", "\n") files <- list() - fileTypes <- c( 'ReadCounts.rds', 'CopyNumbers.rds' ) - if ( doSegment ){ fileTypes <- c( fileTypes, 'CopyNumbersSegmented.rds') } + #fileTypes <- c( 'ReadCounts.rds', 'CopyNumbers.rds' ) + fileTypes <- c( 'ReadCounts.rds' ) + if ( doCall ){ + fileTypes <- c( fileTypes, 'CopyNumbersCalled.rds') + }else if ( doSegment ){ + fileTypes <- c( fileTypes, 'CopyNumbersSegmented.rds') + }else { + fileTypes <- c( fileTypes, 'CopyNumbers.rds') + } for ( fileType in fileTypes ){ fileNames <- mapply( paste, binSizes, paste( 'kbp_QDNAseq', fileType, sep=''), sep='') @@ -441,7 +507,7 @@ width_t <- 100; height_t <- 40 ## thumb img cat( '<h3 class="qdnaseq">Results: overview</h3><p>', "\n") - cat( '<p>This table contains the visual results of the copy number aberration analysis. You can click on an image to jump to the larger version. If segmentation was performed as well the number of segments is shown and a file with genomic regions can be downloaded (just remember to inspect the results carefully as this is a more exprimental step).</p>', "\n", sep='') + cat( '<p>This table contains the visual results of the copy number aberration analysis. You can click on an image to jump to the larger version. If segmentation was performed as well the number of segments is shown and a file with genomic regions can be downloaded (just remember to inspect the results carefully as this is, together with optional calling afterwards, a more experimental type of analysis).</p>', "\n", sep='') plots_html <- '' colspan <- 1 @@ -468,6 +534,7 @@ html_copy_thumb <- htmlLink( path=paste('#', copy_img, sep=''), paste('<img src="',copy_img,'" alt="', bam_file, '" width="', width_t, '" height="', height_t, '">', sep='') ) html_copy_img <- htmlLink( path=copy_img, paste('<img id="', copy_img,'" src="',copy_img,'" alt="',bam_file, '" width="', width, '" height="', height, '">', sep='') ) html_segm_img <- '' + html_call_img <- '' html_bedGraph <- '' region_count <- '' htmlRow <- paste( htmlRow, '<td>', html_copy_thumb, '</td>' ) @@ -480,7 +547,11 @@ html_segm_img <- htmlLink( path=segm_img, paste('<img id="', segm_img,'" src="', segm_img,'" alt="', bam_file, '" width="', width, '" height="', height,'">', sep='') ) htmlRow <- paste( htmlRow, '<td>', region_count, ' (', html_bedGraph, ')</td>', sep="" ) } - plots_html <- paste( plots_html, html_copy_img, "\n", html_segm_img, "\n<br \\>\n", sep='' ) + if ( doCall ){ + call_img <- plotted_images[[ paste(binSize, bam_file, 'Called', sep="_" ) ]] + html_call_img <- htmlLink( path=call_img, paste('<img id="', call_img,'" src="', call_img,'" alt="', bam_file, '" width="', width, '" height="', height,'">', sep='') ) + } + plots_html <- paste( plots_html, html_copy_img, "\n", html_segm_img, "\n", html_call_img, "\n<br \\>\n", sep='' ) } plots_html <- paste( plots_html, "\n<hr \\>\n", sep='' ) ## add info to overview table, including small thumbnails