Mercurial > repos > stef > qdnaseq
changeset 42:4351c7715275 draft
Uploaded
author | stef |
---|---|
date | Thu, 19 Jun 2014 12:35:36 -0400 |
parents | 2489b20dc7ab |
children | 327b8830d49f |
files | QDNAseq.R |
diffstat | 1 files changed, 77 insertions(+), 79 deletions(-) [+] |
line wrap: on
line diff
--- a/QDNAseq.R Thu Jun 19 12:35:21 2014 -0400 +++ b/QDNAseq.R Thu Jun 19 12:35:36 2014 -0400 @@ -13,8 +13,8 @@ cmd.args <- commandArgs() m <- regexpr("(?<=^--file=).+", cmd.args, perl=TRUE) script.dir <- dirname(regmatches(cmd.args, m)) - if(length(script.dir) == 0) stop("[ERR] Can't determine script dir: please call the script with Rscript\n") - if(length(script.dir) > 1) stop("[ERR] Can't determine script dir: more than one '--file' argument detected\n") + if( length(script.dir) == 0 ) stop("[ERR] Can't determine script dir: please call the script with Rscript\n") + if( length(script.dir) > 1 ) stop("[ERR] Can't determine script dir: more than one '--file' argument detected\n") return(script.dir) } ## -------------------- @@ -37,7 +37,7 @@ tmp <- list() l1 <- length(paths) l2 <- length(names) - if ( l1 != l2 ) stop( "Unequal amount of bam-paths (",l1,") and -names (",l2,") in makeBamFileList!!!\n" ) + if ( l1 != l2 ) stop( "Unequal amount of bam-paths (", l1, ") and -names (", l2, ") in makeBamFileList!!!\n" ) for ( i in 1:length(paths) ){ path <- paths[i] name <- names[i] @@ -82,7 +82,7 @@ fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], call=prev.call, segmentval=round(prev.segm, digits=DECIMALS) ) ) if ( prev.call != 0 ){ - cat( MAIN_NAME, " ...found called/segmented region (", chr, ':', start, ' call=', prev.call, ' segment=', prev.segm, ")\n", sep="" ) + catMsg( c(" ...found called/segmented region (", chr, ':', start, ' call=', prev.call, ' segment=', prev.segm, ")" ) ) } prev.call <- curr.call prev.segm <- curr.segm @@ -107,7 +107,7 @@ if ( missing(outputBasename) ) stop( 'Please provide outputBasename...' ) if ( !is.list(regionsList) ) stop( 'Input not a list...?' ) if ( length(regionsList) < 1 ) stop( 'List seems empty...?' ) - if ( file.exists( outputDir ) ) cat( MAIN_NAME, " Using dir ", outputDir, " for output\n", sep="") + if ( file.exists( outputDir ) ) catMsg( c(" Using dir ", outputDir, " for output") ) else dir.create( outputDir ) outFiles <- list() @@ -118,10 +118,10 @@ sampleNames <- names( regionsList ) bedgraphColumns <- c( 'chr', 'start', 'end', 'segmentval' ) - cat( MAIN_NAME, " There are ", sampleCount, " samples found in input list...\n", sep="") + catMsg( c( " There are ", sampleCount, " samples found in input list") ) for ( sample in sampleNames ){ - cat( MAIN_NAME, " Working on sample ", sample, "\n", sep="") + catMsg( c(" Working on sample ", sample ) ) regionCount <- nrow( regionsList[[sample]] ) outSampleBase <- paste( outputBasename, '_', sample, '_QDNAseqRegions', sep='') @@ -148,31 +148,11 @@ outFiles } -#printIgvFile <- function( dat, filename ){ -# -# if ( 'calls' %in% assayDataElementNames(dat) ) { -# #output <- paste(output, '-called.igv', sep="") -# cat('#type=COPY_NUMBER\n#track coords=1\n', file=filename) -# df <- data.frame(chromosome=as.character(chromosomes(dat)), start=bpstart(dat), end=bpend(dat), feature=featureNames(dat), calls(dat), check.names=FALSE, stringsAsFactors=FALSE) -# }else { -# #output <- paste(output, '-normalized.igv', sep="") -# cat('#type=COPY_NUMBER\n#track coords=1\n', file=filename) -# df <- data.frame(chromosome=as.character(chromosomes(dat)), start=bpstart(dat), end=bpend(dat), feature=featureNames(dat), round(copynumber(dat), digits=2), check.names=FALSE, stringsAsFactors=FALSE) -# } -# -# df$chromosome[df$chromosome == '23'] <- 'X' -# df$chromosome[df$chromosome == '24'] <- 'Y' -# df$chromosome[df$chromosome == '25'] <- 'MT' -# #return( df ) -# write.table( df, file=filename, append=TRUE, quote=FALSE, sep='\t', row.names=FALSE ) -#} - - ## ================================================== ## Start of analysis ## ================================================== +MAIN_NAME <- '[INFO] ' TOOL_PATH <- getScriptPath() -MAIN_NAME <- '[INFO] ' CSS_FILE <- paste( TOOL_PATH, '/QDNAseq.css', sep="" ) DECIMALS <- 3 WEB_LINK <- 'http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html' @@ -182,21 +162,22 @@ suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) ) suppressWarnings( suppressMessages( library( CGHcall, quietly = TRUE ) ) ) -systemUser <- system("whoami",T) -qdnaseqVersion <- packageDescription( "QDNAseq" )$Version -catMsg( c("Analysis run with user: ", systemUser ) ) -catMsg( c("QDNAseq version loaded: ", qdnaseqVersion) ) - ## only one param: the tmp config file cmdLineArgs <- commandArgs(TRUE) config <- cmdLineArgs[1] ## sourcing the config file will load all input params +## many variables are created in sourced "config" source( config ) -## if call output requested, set doCall such that we will segment and call +PLOT_RES <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 6.3 # desparate try to make png text scale well, damn you R...! if ( doOutputCallsRds == TRUE ){ doCall <- TRUE } +systemUser <- system("whoami",T) +qdnaseqVersion <- packageDescription( "QDNAseq" )$Version +catMsg( c("Analysis run with user: ", systemUser ) ) +catMsg( c("QDNAseq version loaded: ", qdnaseqVersion) ) + ## get the comma separated list of chromosomes to exclude excludeChrs <- unlist( strsplit( excludeChrsString, ",") ) @@ -210,16 +191,12 @@ ## ------------------------ ## setup bam filelist for easy retrieval later -#catMsg( "Setting up input bam list" ) -#cat( bamsPaths, "\n" ) -#catMsg( "Namews" ) -#cat( bamsNames, "\n" ) fileList <- makeBamFileList( bamsPaths, bamsNames ) bamCount <- length( fileList[[ 'all_paths' ]] ) -## help msg still needs work! +## help msg for running script separate still needs work! #if ( length(cmdLineArgs) == 0 || cmdLineArgs[1] == "-h" || cmdLineArgs[1] == "--help"){ -# cat( paste( MAIN_NAME, "Usage: ", params_help, sep=''), "\n" ) +# catMsg( c( "Usage: ", paste(params_help,sep=",") ) ) # quit( save = 'no', status=0, runLast=F ) #} @@ -242,15 +219,20 @@ regiOutputName <- paste( binSize, 'kbp_QDNAseqRegions.rds', sep='') igvCalledName <- paste( binSize, 'kbp_QDNAseq-calls.igv', sep='') igvCopyNrName <- paste( binSize, 'kbp_QDNAseq-normalized.igv', sep='') +noiseImgName <- paste( 'QDNAseqNoisePlot.png', sep='') +freqImgName <- paste( 'QDNAseqFrequencyPlot.png', sep='') -gzipOutputPath <- paste( outputPath, '/', gzipOutputName, sep="") -htmlOutputPath <- paste( outputPath, '/', htmlOutputName, sep="") -robjReadCoPath <- paste( outputPath, '/', robjReadCoName, sep="") -robjCopyNrPath <- paste( outputPath, '/', robjCopyNrName, sep="") -robjCalledPath <- paste( outputPath, '/', robjCalledName, sep="") -robjRegionPath <- paste( outputPath, '/', regiOutputName, sep="") -igvCalledPath <- paste( outputPath, '/', igvCalledName, sep="") -igvCopyNrPath <- paste( outputPath, '/', igvCopyNrName, sep="") + +gzipOutputPath <- paste( outputPath, '/', gzipOutputName, sep='') +htmlOutputPath <- paste( outputPath, '/', htmlOutputName, sep='') +robjReadCoPath <- paste( outputPath, '/', robjReadCoName, sep='') +robjCopyNrPath <- paste( outputPath, '/', robjCopyNrName, sep='') +robjCalledPath <- paste( outputPath, '/', robjCalledName, sep='') +robjRegionPath <- paste( outputPath, '/', regiOutputName, sep='') +igvCalledPath <- paste( outputPath, '/', igvCalledName, sep='') +igvCopyNrPath <- paste( outputPath, '/', igvCopyNrName, sep='') +noiseImgPath <- paste( outputPath, '/', noiseImgName, sep='' ) +freqImgPath <- paste( outputPath, '/', freqImgName, sep='' ) ## ------------------------ @@ -282,9 +264,9 @@ sampleNames <- readCountsFiltered@phenoData@data$name ## save objects to output dir -saveRDS( readCounts, robjReadCoPath ); +saveRDS( readCountsFiltered, robjReadCoPath ); saveRDS( copyNumbersSmooth, robjCopyNrPath ); -exportBins(copyNumbersSmooth, file=igvCopyNrPath, format="igv") +exportBins( copyNumbersSmooth, file=igvCopyNrPath, format="igv" ) ## also save objects for galaxy history output if requested if ( doOutputReadcountsRds ){ @@ -305,6 +287,7 @@ saveRDS( copyNumbersCalled, calledSegmentsDatasetFile ); } exportBins( copyNumbersCalled, file=igvCalledPath, format="igv") + } @@ -315,12 +298,17 @@ plotted_images <- list() # to keep track of images for later linking regions <- list() # will contain the (called) segments -noise_img_file <- paste( binSize, 'kbp_QDNAseqNoisePlot.png', sep='') -noise_img_file_path <- paste( outputPath, '/', noise_img_file, sep='' ) -png( noise_img_file_path, width=480, height=480 ); - noisePlot( readCountsFiltered ) +png( noiseImgPath, width=PLOT_HEIGHT, height=PLOT_HEIGHT, res=PLOT_RES ); + noisePlot( readCountsFiltered, col="darkgreen" ) dev.off() +if ( doCall ){ + png( freqImgPath, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); + frequencyPlot( copyNumbersCalled ) + dev.off() +} + + for (i in 1:length(sampleNames) ){ #for (sample in sampleNames(copyNumbersSmooth) ){ sample <- sampleNames[i] @@ -330,22 +318,24 @@ type <- 'CopyNumbers' img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='') img_file_path <- paste( outputPath, '/', img_file, sep='' ) - png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT ); plot( copyNumbersSmooth[ ,sample ] ); dev.off() + png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); + plot( copyNumbersSmooth[ ,sample ] ); + dev.off() plotted_images[[ sample ]][[ type ]] <- img_file if ( doCall ){ type <- 'Called' img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='') img_file_path <- paste( outputPath, '/', img_file, sep='' ) - png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT ); + png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); plot( copyNumbersCalled[ ,sample ] ); dev.off() plotted_images[[ sample ]][[ type ]] <- img_file - cat( MAIN_NAME, " Fusing regions of sample: ", sample, "\n", sep="") + catMsg( c(" Fusing regions of sample: ", sample) ) regions[[ sample ]] <- fuse.regions_test( cgh[, sample] ) region_count <- nrow( data.frame( regions[[ sample ]] ) ) - cat( MAIN_NAME, ' sample "', sample, '" has ', region_count, " regions\n", sep="" ) + catMsg( c( ' sample "', sample, '" has ', region_count, " regions" ) ) plotted_images[[ sample ]][[ 'region_count' ]] <- region_count } @@ -361,7 +351,7 @@ ## ------------------------ ## prepare output ## ------------------------ -cat( MAIN_NAME, "...zipping output\n") +catMsg( "...zipping output") zip_cmd <- paste( "zip -j", gzipOutputPath, paste(outputPath,'/*',sep='') ) ## -j is for removing dirs from the tree system( zip_cmd ) @@ -385,7 +375,6 @@ cat( "<html>\n") cat( "<head>\n") #cat( '<link rel="stylesheet" type="text/css" href="test.css" media="screen" />', "\n" ) - #cat( '<link rel="stylesheet" type="text/css" href="../../../../static/style/test.css" media="screen" />', cat( "\t", '<link rel="stylesheet" href="http://yui.yahooapis.com/pure/0.4.2/pure-min.css">', "\n" ) cat( "\t<style>\n") ## have to include CSS into html file, because css referencing outside own dir doesn't seem to work... @@ -401,9 +390,7 @@ cat( "<h1>QDNAseq Report</h1>", "\n") cat( '<h3 class="qdnaseq">About this analysis</h3>', "\n") - cat( '<p>This page provides access to all results. To have a local copy of this report just download the <a href="', gzipOutputName, '" class="button-success button-small pure-button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep='') - #cat( '<a href="#" class="button-success button-xsmall pure-button">test</a>' ) - + cat( '<p>This page provides access to all results. To have a local copy of this report just download the <a href="', gzipOutputName, '" class="button-success button-small pure-button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep='') ## ------------------------ ## table with general info @@ -413,8 +400,6 @@ cat( htmlTableRow( c( "AnalysisName", outputName ) ) ) cat( htmlTableRow( c( "AnalysisDate", 'todo' ) ) ) cat( htmlTableRow( c( "BinSize (kb)", binSize ) ) ) - #cat( htmlTableRow( c( "undoSD", undoSD ) ) ) - #cat( htmlTableRow( c( "useBlacklist", filterBlacklistedBins ) ) ) for ( galaxyName in fileList[[ 'all_files' ]] ){ sampleName <- fileList[[ galaxyName ]] @@ -431,7 +416,6 @@ cat( '<h3 class="qdnaseq">Output files</h3><p>', "\n") cat( '<dl>', "\n" ) - #cat( '<dt>Definition term</dt>', "\n", '<dd>Definition term</dd>', "\n" ) cat( '<dt>', htmlLink( path=robjReadCoName, robjReadCoName ), '</dt>', "\n" ) cat( '<dd>QDNAseq object with read counts per bin, ', readCoSize,'</dd>', "\n" ) r_code <- paste( r_code, '<code class="code">readCounts <- readRDS(', robjReadCoName, ")</code><br />", sep="") @@ -458,6 +442,7 @@ } cat( '</dl></p>', "\n" ) + ## r_code shows example code on how to load output files #cat( r_code, "</p>\n", sep="\n") cat( '<p>See ', htmlLink( WEB_LINK, 'the bioconductor QDNAseq documentation' ), ' for more information on how to work with these files</p>', "\n", sep='' ) @@ -506,36 +491,48 @@ } cat( "</tbody></table></p>", "\n") - ## add noise plot - html_noise_img <- htmlLink( path=noise_img_file, paste('<img id="', noise_img_file,'" src="',noise_img_file,'" alt="NoisePlot">', sep='') ) - plots_html <- paste( plots_html, html_noise_img, "\n<hr \\>\n", sep='' ) + ## ------------------------ + ## create the noise and frequency plots + ## ------------------------ + html_noise_img <- htmlLink( + path=noiseImgName, + paste('<img id="', noiseImgName,'" src="', noiseImgName,'" alt="NoisePlot">', sep='') + ) + html_freq_img <- htmlLink( + path=freqImgName, + paste('<img id="', freqImgName,'" src="', freqImgName,'" alt="FrequenceyPlot">', sep='') + ) + extra_plots_html <- paste( + html_noise_img, "\n<br \\>\n", + html_freq_img, "\n", sep='' + ) ## ------------------------ ## section with various output shown ## ------------------------ - cat( '<h3 class="qdnaseq">Results: plots</h3><p>', "\n") + cat( '<h3 class="qdnaseq">Results: Analysis plots</h3><p>', "\n") + cat( extra_plots_html, "\n") + cat( '<h3 class="qdnaseq">Results: Sample plots</h3><p>', "\n") cat( plots_html, "\n") cat( "\n</p></body>\n") cat( "\n</html>\n") sink() ## ------------------------ -## creating html output to be viewed in middle galaxy pane +## creating main html output for galaxy history ## ------------------------ -#sink( file = htmlOutputPath, type = "output" ) sink( file = outputHtml, type = "output" ) cat( "<head>", "\n") cat( "\t", '<link rel="stylesheet" href="http://yui.yahooapis.com/pure/0.4.2/pure-min.css">', "\n" ) cat( "<style>", "\n") - ## have to include CSS into html file, because css referencing outside own dir doesn't seem to work...makes it more portable anyway :P + ## include CSS directly into html file cat( paste( "\t", '/* the css here originates from ', CSS_FILE,' */', "\n") ) cat( paste( "\t", readLines( CSS_FILE, n = -1)), sep="\n" ) cat( "</style>", "\n") cat( "</head>", "\n") - cat( '<h1>QDNAseq Results (', outputName,')</h1>', "\n", sep="") cat( '<p>Explore <a href="', htmlOutputName, '" class="button-success button-small pure-button">the results</a> directly within galaxy</p>', "\n", sep="") cat( '<p>Or download a <a href="', gzipOutputName, '" class="button-success button-small pure-button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep="" ) @@ -544,9 +541,10 @@ sink() - -cat( MAIN_NAME, "...zipping output\n") -zip_cmd <- paste( "zip -j ", gzipOutputPath, paste(outputPath,'/', htmlOutputName, sep='') ) ## -j is for removing dirs from the tree -system( zip_cmd ) -cat( MAIN_NAME, "done...\n", sep="") +## ------------------------ +## create final zip and quit with status 0 to tell galaxy all was fine +## ------------------------ +catMsg( "zipping all output") +system( paste( "zip -j ", gzipOutputPath, paste(outputPath,'/', htmlOutputName, sep='') ) ) +catMsg( "done" ) q(status=0)