# HG changeset patch # User stef # Date 1403195736 14400 # Node ID 4351c77152751ca2e49e9b3b6ed7ca9982aa39be # Parent 2489b20dc7abfc736a3f6ed859b24b5f4c2f83f7 Uploaded diff -r 2489b20dc7ab -r 4351c7715275 QDNAseq.R --- a/QDNAseq.R Thu Jun 19 12:35:21 2014 -0400 +++ b/QDNAseq.R Thu Jun 19 12:35:36 2014 -0400 @@ -13,8 +13,8 @@ cmd.args <- commandArgs() m <- regexpr("(?<=^--file=).+", cmd.args, perl=TRUE) script.dir <- dirname(regmatches(cmd.args, m)) - if(length(script.dir) == 0) stop("[ERR] Can't determine script dir: please call the script with Rscript\n") - if(length(script.dir) > 1) stop("[ERR] Can't determine script dir: more than one '--file' argument detected\n") + if( length(script.dir) == 0 ) stop("[ERR] Can't determine script dir: please call the script with Rscript\n") + if( length(script.dir) > 1 ) stop("[ERR] Can't determine script dir: more than one '--file' argument detected\n") return(script.dir) } ## -------------------- @@ -37,7 +37,7 @@ tmp <- list() l1 <- length(paths) l2 <- length(names) - if ( l1 != l2 ) stop( "Unequal amount of bam-paths (",l1,") and -names (",l2,") in makeBamFileList!!!\n" ) + if ( l1 != l2 ) stop( "Unequal amount of bam-paths (", l1, ") and -names (", l2, ") in makeBamFileList!!!\n" ) for ( i in 1:length(paths) ){ path <- paths[i] name <- names[i] @@ -82,7 +82,7 @@ fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], call=prev.call, segmentval=round(prev.segm, digits=DECIMALS) ) ) if ( prev.call != 0 ){ - cat( MAIN_NAME, " ...found called/segmented region (", chr, ':', start, ' call=', prev.call, ' segment=', prev.segm, ")\n", sep="" ) + catMsg( c(" ...found called/segmented region (", chr, ':', start, ' call=', prev.call, ' segment=', prev.segm, ")" ) ) } prev.call <- curr.call prev.segm <- curr.segm @@ -107,7 +107,7 @@ if ( missing(outputBasename) ) stop( 'Please provide outputBasename...' ) if ( !is.list(regionsList) ) stop( 'Input not a list...?' ) if ( length(regionsList) < 1 ) stop( 'List seems empty...?' ) - if ( file.exists( outputDir ) ) cat( MAIN_NAME, " Using dir ", outputDir, " for output\n", sep="") + if ( file.exists( outputDir ) ) catMsg( c(" Using dir ", outputDir, " for output") ) else dir.create( outputDir ) outFiles <- list() @@ -118,10 +118,10 @@ sampleNames <- names( regionsList ) bedgraphColumns <- c( 'chr', 'start', 'end', 'segmentval' ) - cat( MAIN_NAME, " There are ", sampleCount, " samples found in input list...\n", sep="") + catMsg( c( " There are ", sampleCount, " samples found in input list") ) for ( sample in sampleNames ){ - cat( MAIN_NAME, " Working on sample ", sample, "\n", sep="") + catMsg( c(" Working on sample ", sample ) ) regionCount <- nrow( regionsList[[sample]] ) outSampleBase <- paste( outputBasename, '_', sample, '_QDNAseqRegions', sep='') @@ -148,31 +148,11 @@ outFiles } -#printIgvFile <- function( dat, filename ){ -# -# if ( 'calls' %in% assayDataElementNames(dat) ) { -# #output <- paste(output, '-called.igv', sep="") -# cat('#type=COPY_NUMBER\n#track coords=1\n', file=filename) -# df <- data.frame(chromosome=as.character(chromosomes(dat)), start=bpstart(dat), end=bpend(dat), feature=featureNames(dat), calls(dat), check.names=FALSE, stringsAsFactors=FALSE) -# }else { -# #output <- paste(output, '-normalized.igv', sep="") -# cat('#type=COPY_NUMBER\n#track coords=1\n', file=filename) -# df <- data.frame(chromosome=as.character(chromosomes(dat)), start=bpstart(dat), end=bpend(dat), feature=featureNames(dat), round(copynumber(dat), digits=2), check.names=FALSE, stringsAsFactors=FALSE) -# } -# -# df$chromosome[df$chromosome == '23'] <- 'X' -# df$chromosome[df$chromosome == '24'] <- 'Y' -# df$chromosome[df$chromosome == '25'] <- 'MT' -# #return( df ) -# write.table( df, file=filename, append=TRUE, quote=FALSE, sep='\t', row.names=FALSE ) -#} - - ## ================================================== ## Start of analysis ## ================================================== +MAIN_NAME <- '[INFO] ' TOOL_PATH <- getScriptPath() -MAIN_NAME <- '[INFO] ' CSS_FILE <- paste( TOOL_PATH, '/QDNAseq.css', sep="" ) DECIMALS <- 3 WEB_LINK <- 'http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html' @@ -182,21 +162,22 @@ suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) ) suppressWarnings( suppressMessages( library( CGHcall, quietly = TRUE ) ) ) -systemUser <- system("whoami",T) -qdnaseqVersion <- packageDescription( "QDNAseq" )$Version -catMsg( c("Analysis run with user: ", systemUser ) ) -catMsg( c("QDNAseq version loaded: ", qdnaseqVersion) ) - ## only one param: the tmp config file cmdLineArgs <- commandArgs(TRUE) config <- cmdLineArgs[1] ## sourcing the config file will load all input params +## many variables are created in sourced "config" source( config ) -## if call output requested, set doCall such that we will segment and call +PLOT_RES <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 6.3 # desparate try to make png text scale well, damn you R...! if ( doOutputCallsRds == TRUE ){ doCall <- TRUE } +systemUser <- system("whoami",T) +qdnaseqVersion <- packageDescription( "QDNAseq" )$Version +catMsg( c("Analysis run with user: ", systemUser ) ) +catMsg( c("QDNAseq version loaded: ", qdnaseqVersion) ) + ## get the comma separated list of chromosomes to exclude excludeChrs <- unlist( strsplit( excludeChrsString, ",") ) @@ -210,16 +191,12 @@ ## ------------------------ ## setup bam filelist for easy retrieval later -#catMsg( "Setting up input bam list" ) -#cat( bamsPaths, "\n" ) -#catMsg( "Namews" ) -#cat( bamsNames, "\n" ) fileList <- makeBamFileList( bamsPaths, bamsNames ) bamCount <- length( fileList[[ 'all_paths' ]] ) -## help msg still needs work! +## help msg for running script separate still needs work! #if ( length(cmdLineArgs) == 0 || cmdLineArgs[1] == "-h" || cmdLineArgs[1] == "--help"){ -# cat( paste( MAIN_NAME, "Usage: ", params_help, sep=''), "\n" ) +# catMsg( c( "Usage: ", paste(params_help,sep=",") ) ) # quit( save = 'no', status=0, runLast=F ) #} @@ -242,15 +219,20 @@ regiOutputName <- paste( binSize, 'kbp_QDNAseqRegions.rds', sep='') igvCalledName <- paste( binSize, 'kbp_QDNAseq-calls.igv', sep='') igvCopyNrName <- paste( binSize, 'kbp_QDNAseq-normalized.igv', sep='') +noiseImgName <- paste( 'QDNAseqNoisePlot.png', sep='') +freqImgName <- paste( 'QDNAseqFrequencyPlot.png', sep='') -gzipOutputPath <- paste( outputPath, '/', gzipOutputName, sep="") -htmlOutputPath <- paste( outputPath, '/', htmlOutputName, sep="") -robjReadCoPath <- paste( outputPath, '/', robjReadCoName, sep="") -robjCopyNrPath <- paste( outputPath, '/', robjCopyNrName, sep="") -robjCalledPath <- paste( outputPath, '/', robjCalledName, sep="") -robjRegionPath <- paste( outputPath, '/', regiOutputName, sep="") -igvCalledPath <- paste( outputPath, '/', igvCalledName, sep="") -igvCopyNrPath <- paste( outputPath, '/', igvCopyNrName, sep="") + +gzipOutputPath <- paste( outputPath, '/', gzipOutputName, sep='') +htmlOutputPath <- paste( outputPath, '/', htmlOutputName, sep='') +robjReadCoPath <- paste( outputPath, '/', robjReadCoName, sep='') +robjCopyNrPath <- paste( outputPath, '/', robjCopyNrName, sep='') +robjCalledPath <- paste( outputPath, '/', robjCalledName, sep='') +robjRegionPath <- paste( outputPath, '/', regiOutputName, sep='') +igvCalledPath <- paste( outputPath, '/', igvCalledName, sep='') +igvCopyNrPath <- paste( outputPath, '/', igvCopyNrName, sep='') +noiseImgPath <- paste( outputPath, '/', noiseImgName, sep='' ) +freqImgPath <- paste( outputPath, '/', freqImgName, sep='' ) ## ------------------------ @@ -282,9 +264,9 @@ sampleNames <- readCountsFiltered@phenoData@data$name ## save objects to output dir -saveRDS( readCounts, robjReadCoPath ); +saveRDS( readCountsFiltered, robjReadCoPath ); saveRDS( copyNumbersSmooth, robjCopyNrPath ); -exportBins(copyNumbersSmooth, file=igvCopyNrPath, format="igv") +exportBins( copyNumbersSmooth, file=igvCopyNrPath, format="igv" ) ## also save objects for galaxy history output if requested if ( doOutputReadcountsRds ){ @@ -305,6 +287,7 @@ saveRDS( copyNumbersCalled, calledSegmentsDatasetFile ); } exportBins( copyNumbersCalled, file=igvCalledPath, format="igv") + } @@ -315,12 +298,17 @@ plotted_images <- list() # to keep track of images for later linking regions <- list() # will contain the (called) segments -noise_img_file <- paste( binSize, 'kbp_QDNAseqNoisePlot.png', sep='') -noise_img_file_path <- paste( outputPath, '/', noise_img_file, sep='' ) -png( noise_img_file_path, width=480, height=480 ); - noisePlot( readCountsFiltered ) +png( noiseImgPath, width=PLOT_HEIGHT, height=PLOT_HEIGHT, res=PLOT_RES ); + noisePlot( readCountsFiltered, col="darkgreen" ) dev.off() +if ( doCall ){ + png( freqImgPath, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); + frequencyPlot( copyNumbersCalled ) + dev.off() +} + + for (i in 1:length(sampleNames) ){ #for (sample in sampleNames(copyNumbersSmooth) ){ sample <- sampleNames[i] @@ -330,22 +318,24 @@ type <- 'CopyNumbers' img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='') img_file_path <- paste( outputPath, '/', img_file, sep='' ) - png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT ); plot( copyNumbersSmooth[ ,sample ] ); dev.off() + png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); + plot( copyNumbersSmooth[ ,sample ] ); + dev.off() plotted_images[[ sample ]][[ type ]] <- img_file if ( doCall ){ type <- 'Called' img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png', sep='') img_file_path <- paste( outputPath, '/', img_file, sep='' ) - png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT ); + png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); plot( copyNumbersCalled[ ,sample ] ); dev.off() plotted_images[[ sample ]][[ type ]] <- img_file - cat( MAIN_NAME, " Fusing regions of sample: ", sample, "\n", sep="") + catMsg( c(" Fusing regions of sample: ", sample) ) regions[[ sample ]] <- fuse.regions_test( cgh[, sample] ) region_count <- nrow( data.frame( regions[[ sample ]] ) ) - cat( MAIN_NAME, ' sample "', sample, '" has ', region_count, " regions\n", sep="" ) + catMsg( c( ' sample "', sample, '" has ', region_count, " regions" ) ) plotted_images[[ sample ]][[ 'region_count' ]] <- region_count } @@ -361,7 +351,7 @@ ## ------------------------ ## prepare output ## ------------------------ -cat( MAIN_NAME, "...zipping output\n") +catMsg( "...zipping output") zip_cmd <- paste( "zip -j", gzipOutputPath, paste(outputPath,'/*',sep='') ) ## -j is for removing dirs from the tree system( zip_cmd ) @@ -385,7 +375,6 @@ cat( "\n") cat( "
\n") #cat( '', "\n" ) - #cat( '', cat( "\t", '', "\n" ) cat( "\t", "\n") cat( "", "\n") - cat( 'Explore the results directly within galaxy
', "\n", sep="") cat( 'Or download a zipfile with all output (', zippedSize, ')
', "\n", sep="" ) @@ -544,9 +541,10 @@ sink() - -cat( MAIN_NAME, "...zipping output\n") -zip_cmd <- paste( "zip -j ", gzipOutputPath, paste(outputPath,'/', htmlOutputName, sep='') ) ## -j is for removing dirs from the tree -system( zip_cmd ) -cat( MAIN_NAME, "done...\n", sep="") +## ------------------------ +## create final zip and quit with status 0 to tell galaxy all was fine +## ------------------------ +catMsg( "zipping all output") +system( paste( "zip -j ", gzipOutputPath, paste(outputPath,'/', htmlOutputName, sep='') ) ) +catMsg( "done" ) q(status=0)