changeset 5:440c8d5e416d draft

Uploaded
author stef
date Mon, 06 Oct 2014 11:04:42 -0400
parents 4f5fd8b917eb
children d4d58887f008
files QDNAseq.R QDNAseq.sh QDNAseq.xml qdnaseq_test-41387ffa145f/QDNAseq.R qdnaseq_test-41387ffa145f/QDNAseq.sh qdnaseq_test-41387ffa145f/QDNAseq.xml qdnaseq_test-41387ffa145f/static/binannotation/1000kbp_binAnnotations.rds qdnaseq_test-41387ffa145f/static/binannotation/100kbp_binAnnotations.rds qdnaseq_test-41387ffa145f/static/binannotation/15kbp_binAnnotations.rds qdnaseq_test-41387ffa145f/static/binannotation/30kbp_binAnnotations.rds qdnaseq_test-41387ffa145f/static/css/QDNAseq.css qdnaseq_test-41387ffa145f/static/images/LGG150_copynumber.png qdnaseq_test-41387ffa145f/static/images/LGG150_copynumberSegmented.png qdnaseq_test-41387ffa145f/tool_dependencies.xml static/binannotation/1000kbp_binAnnotations.rds static/binannotation/100kbp_binAnnotations.rds static/binannotation/15kbp_binAnnotations.rds static/binannotation/30kbp_binAnnotations.rds static/css/QDNAseq.css static/images/LGG150_copynumber.png static/images/LGG150_copynumberSegmented.png tool_dependencies.xml
diffstat 22 files changed, 1003 insertions(+), 1001 deletions(-) [+]
line wrap: on
line diff
--- a/QDNAseq.R	Mon Oct 06 05:33:48 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,529 +0,0 @@
-#!/usr/bin/Rscript
-
-## --------------------
-## prints all arguments as msg
-## --------------------
-catMsg <- function( msg=c() ){	
-	cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='')
-}
-## --------------------
-## return the location of this script
-## --------------------
-getScriptPath <- function(){
-    cmd.args <- commandArgs()
-    m <- regexpr("(?<=^--file=).+", cmd.args, perl=TRUE)
-    script.dir <- dirname(regmatches(cmd.args, m))
-    if( length(script.dir) == 0 ) stop("[ERR] Can't determine script dir: please call the script with Rscript\n")
-    if( length(script.dir) > 1 ) stop("[ERR] Can't determine script dir: more than one '--file' argument detected\n")
-    return(script.dir)
-}
-## --------------------
-## Some html creation functions
-## --------------------
-htmlTableRow <- function( string_array=c() ){
-	td_cells <- ''
-	for ( i in string_array ){ 
-		td_cells <- paste( td_cells, '<td>', i, '</td>', sep='' )	
-	}
-	return( paste( "<tr>", td_cells, "</tr>") )
-}
-htmlLink <- function( path, desc="LINK" ){
-	return( paste( '<a href="', path, '">', desc, "</a>", sep='') )
-}
-## --------------------
-## constructs a list with input bam file info
-## --------------------
-makeBamFileList <- function( paths, names ){	
-	tmp <- list()
-	l1 <- length(paths)
-	l2 <- length(names)
-	if ( l1 != l2 ) stop( "Unequal amount of bam-paths (", l1, ") and -names (", l2, ") in makeBamFileList!!!\n" )
-	if ( l1 == 0 ){ return(tmp) } # empty list in debug mode
-
-	for ( i in 1:length(paths) ){
-		path <- paths[i]
-		name <- names[i]
-		file <- basename(path)
-
-		tmp[[ file ]] <- name
-		tmp[[ 'all_paths' ]] <- c( tmp[[ 'all_paths' ]], path )
-		tmp[[ 'all_files' ]] <- c( tmp[[ 'all_files' ]], file )
-		tmp[[ 'all_names' ]] <- c( tmp[[ 'all_names' ]], name )
-	}
-	return( tmp )
-}
-
-## --------------------
-## copied code for extracting the regions by segment call status
-## --------------------
-fuseRegions <- function( obj, minRatio=0 ) {
-	if ( ncol(obj) > 1 ) stop('Please specify which sample...')
-
-	data <- data.frame( obj@featureData@data[,1:3], copynumber(obj), segmented(obj), check.names=FALSE, stringsAsFactors=FALSE)
-	colnames( data ) <- c( "chr", "start", "end", "log2", "segmentval" )
-	
-	fused.data <- data.frame()
-	curr.bin <- 1
-	for ( chr in unique( data$chr ) ) {
-		chr.data  <- data[ data$chr == chr, ]
-		prev.bin  <- curr.bin
-		prev.log2 <- chr.data[ 1, 'log2' ]
-		prev.segm <- chr.data[ 1, 'segmentval' ]
-		start     <- chr.data[ 1, 'start' ]
-
-		if ( nrow(chr.data) > 1) {
-			for ( i in 2:nrow(chr.data) ) {
-				curr.bin  <- curr.bin + 1
-				curr.segm <- chr.data[ i, 'segmentval']
-
-				if ( curr.segm != prev.segm ) {
-					fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], segmentval=round(prev.segm, digits=DECIMALS) ) )
-					prev.segm <- curr.segm
-					prev.bin  <- curr.bin
-					start     <- chr.data[ i, 'start']
-				}
-			}
-			fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], segmentval=round(prev.segm, digits=DECIMALS) ) )
-		}else{
-			fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], segmentval=round(prev.segm, digits=DECIMALS) ) )
-		}
-	}
-	## remove regions with low amplitude
-	fused.data <- fused.data[ abs(fused.data$segmentval) >= minRatio, ]
-	fused.data
-}
-
-## DESC: takes the output of fuse.regions and outputs a txt file per sample
-outputRegionsFromList <- function ( regionsList, outputBasename, outputDir="./", binSize, storeList ){
-	if ( missing(regionsList) ) stop( 'Please provide regionsList...' )
-	if ( missing(outputBasename) ) stop( 'Please provide outputBasename...' )
-	if ( !is.list(regionsList) ) stop( 'Input not a list...?' )
-	if ( length(regionsList) < 1 ) stop( 'List seems empty...?' )
-	if ( file.exists( outputDir ) ) catMsg( c(" Using dir ", outputDir, " for output") )
-	else dir.create( outputDir )
-
-	## have to set R output options otherwise scientific method is used at some point
-	options( "scipen"=100 )
-
-	sampleCount <- length( regionsList )
-	sampleNames <- names( regionsList )
-	bedgraphColumns <- c( 'chr', 'start', 'end', 'segmentval' )
-	
-	catMsg( c( " There are ", sampleCount, " samples found in input list") )
-
-	for ( sample in sampleNames ){		
-		catMsg( c(" Working on sample ", sample ) )
-		regionCount <- nrow( regionsList[[sample]] )
-		
-		outSampleBase   <- paste( outputBasename, '_', sample, '_', binSize, 'kbp', sep='')
-		outBedgraphFile <- paste( outSampleBase, '.bedGraph', sep="" )
-		outBedgraphPath <- paste( outputDir, '/', outBedgraphFile, sep="" )
-
-		## ---------- BEDGRAPH ----------
-		txt <- paste( "track type=bedGraph color=0,100,0 altColor=255,0,0 name=", sample," description=segmented_regions_from_QDNAseq_",binSize,"kbp\n", sep="")
-		sink( outBedgraphPath )
-			cat( txt )
-		sink()
-		write.table( regionsList[[sample]][,bedgraphColumns], outBedgraphPath, quote=F, sep="\t", row.names=F, append=T, col.names=F)
-		#outFiles[[sample]] <- c( outBedgraphFile )
-		storeList[[ paste( binSize, sample, 'bedgraph', sep="_")]] <- outBedgraphFile
-	}
-	return(storeList)
-}
-
-
-## ==================================================
-## Start of analysis
-## ==================================================
-MAIN_NAME <- '[INFO] '
-TOOL_PATH <- getScriptPath()
-CSS_FILE  <- paste( TOOL_PATH, '/static/css/QDNAseq.css', sep="" )
-DECIMALS  <- 3
-WEB_LINK  <- 'http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html'
-PURE_CSS  <- 'http://yui.yahooapis.com/pure/0.5.0/pure-min.css'
-
-catMsg( "Starting QDNAseq wrapper" )	
-catMsg( "Loading R libraries" )
-suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) )
-suppressWarnings( suppressMessages( library( CGHcall, quietly = TRUE ) ) )
-
-## only one param: the tmp config file
-cmdLineArgs <- commandArgs(TRUE)
-config      <- cmdLineArgs[1]
-
-## sourcing the config file will load all input params
-## many variables are imported via sourced "config"
-source( config )
-
-## desparate tries to make png text scale well, damn you R...!
-PLOT_RES  <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 6.3 
-PAR_SET   <- list( pch=22 )
-
-systemUser <- system("whoami",T)
-qdnaseqVersion <- packageDescription( "QDNAseq" )$Version
-rVersion <- R.version.string
-startTime <- Sys.time()
-analysisStart <- as.character( startTime )
-catMsg( c("QDNAseq version: ", qdnaseqVersion) )
-catMsg( c( rVersion ) )
-
-## get the comma separated list of chromosomes to exclude
-excludeChrs <- unlist( strsplit( excludeChrsString, ",") )
-binSizes <- as.numeric( unlist( strsplit( binSizesString, ",") ) )
-
-
-## ------------------------
-## DEBUG
-if ( debug ){
-	catMsg( c("Analysis run by user: ", systemUser ) )
-	catMsg( c("DEBUG SessionInfo: " ) )
-	print( sessionInfo() )
-}
-## /DEBUG
-## ------------------------
-
-## prepare output dir
-if ( !file.exists( outputPath) ){
-	dir.create( outputPath )
-}
-
-## copy source config file to output dir to include it in output zip
-if ( inGalaxy ){
-	file.copy( config, paste(outputPath, 'galaxyConfigFile.R', sep='/') )	
-}
-
-## setup bam filelist for easy retrieval later
-fileList    <- makeBamFileList( bamsPaths, bamsNames )
-bamCount    <- length( fileList[[ 'all_paths' ]] )
-
-gzipOutputName <- paste( 'QDNAseqResults_', outputName, '.zip', sep='' )
-gzipOutputPath <- paste( outputPath, '/', gzipOutputName, sep='')
-htmlOutputName <- 'index.html'
-htmlOutputPath <- paste( outputPath, '/', htmlOutputName, sep='')
-
-plotted_images <- list() # to keep track of images for later linking
-regions <- list() # will contain the segments
-outputFiles <- list()
-
-## ------------------------
-## in case of debug just use inbuilt LGG data for speedup
-if ( debug ){ 
-	binSizes <- c(15)
-	bamsPaths  <- c( "BUILD_IN_DATA")
-	bamsNames  <- c( "LGG150")
-	fileList   <- makeBamFileList( bamsPaths, bamsNames )
-	bamCount   <- length( fileList[[ 'all_paths' ]] )
-}
-
-for ( binSize in binSizes ){
-
-	## ------------------------
-	## construct output file-names and -paths
-	## ------------------------
-	robjReadCoName <- paste( binSize, 'kbp_QDNAseqReadCounts.rds', sep='')
-	robjCopyNrName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.rds', sep='')
-	igvCopyNrName  <- paste( binSize, 'kbp_QDNAseqCopyNumbers.igv', sep='')
-	robjSegmntName <- paste( binSize, 'kbp_QDNAseqCopyNumbersSegmented.rds', sep='')
-	regiOutputName <- paste( binSize, 'kbp_QDNAseqRegions.rds', sep='')
-	noiseImgName   <- paste( binSize, 'kbp_QDNAseqNoiseplot.png', sep='')
-	
-	robjReadCoPath <- paste( outputPath, '/', robjReadCoName, sep='')
-	robjCopyNrPath <- paste( outputPath, '/', robjCopyNrName, sep='')
-	robjSegmntPath <- paste( outputPath, '/', robjSegmntName, sep='')
-	robjRegionPath <- paste( outputPath, '/', regiOutputName, sep='')
-	igvCopyNrPath  <- paste( outputPath, '/', igvCopyNrName, sep='')
-	noiseImgPath   <- paste( outputPath, '/', noiseImgName, sep='')
-
-	binAnnFile <- paste( TOOL_PATH, '/static/binannotation/', binSize, 'kbp_binAnnotations.rds', sep="" )
-	if ( file.exists(binAnnFile) ){
-		binAnnotations <- readRDS( binAnnFile )
-		catMsg( c("Using local binAnnotations file" ) )
-	}else{
-		binAnnotations <- getBinAnnotations( binSize=binSize, type=experimentType )
-	}
-
-	## in case of debug just use inbuilt LGG data for speedup
-	if ( debug ){
-		data(LGG150)
-		readCounts <- LGG150
-	}else{
-		## provide bamnames because in galaxy everyting is called "dataset_###"
-		readCounts <- binReadCounts( binAnnotations, bamfiles=fileList[[ 'all_paths' ]], bamnames=fileList[[ 'all_names' ]] )	
-	}
-
-	readCountsFiltered    <- applyFilters( readCounts, residual=TRUE, blacklist=filterBlacklistedBins, mappability=mappabilityCutoff, chromosomes=excludeChrs )
-	readCountsFiltered    <- estimateCorrection( readCountsFiltered )
-	copyNumbers           <- correctBins( readCountsFiltered )
-	copyNumbersNormalized <- normalizeBins( copyNumbers )
-	copyNumbersSmooth     <- smoothOutlierBins( copyNumbersNormalized )
-	sampleNames           <- readCountsFiltered@phenoData@data$name
-
-	## save objects to output dir
-	saveRDS( readCountsFiltered, robjReadCoPath );
-	saveRDS( copyNumbersSmooth, robjCopyNrPath );
-	exportBins( copyNumbersSmooth, file=igvCopyNrPath, format="igv" )
-
-	## also save objects for galaxy history output if requested
-	if ( doOutputCopynumbersIgv ){
-		#@ a bit hacky galaxy way to allow an unknown number of output files based on param selection
-		#@ see: https://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files
-		historyName <- paste(binSize, 'kbp-IGV', sep="")
-		igvFile <- paste( newFilePath, "/primary_", outputId, "_", historyName, "_visible_txt", sep="" )
-		exportBins( copyNumbersSmooth, file=igvFile, format="igv" )
-		catMsg( c("Exported igv file to history for ", binSize, "kbp bin") )
-	}
-
-	## proceed with calling if requested
-	if ( doSegment ){
-		copyNumbersSegmented  <- segmentBins( copyNumbersSmooth, undo.splits=undoSplits, undo.SD=undoSD )
-		copyNumbersSegmented  <- normalizeSegmentedBins( copyNumbersSegmented )
-		cgh <- makeCgh( copyNumbersSegmented )
-		saveRDS( copyNumbersSegmented, robjSegmntPath );
-	}
-
-	## ------------------------
-	## create output files
-	## ------------------------
-	png( noiseImgPath, width=PLOT_HEIGHT, height=PLOT_HEIGHT, res=PLOT_RES );
-		par( PAR_SET )
-		noisePlot( readCountsFiltered, main=paste( "Noise Plot ", binSize, "kbp", sep=''), col="darkgreen" )
-	dev.off()
-
-	binSize <- as.character( binSize ) # to avoid R using it as array index... (*#$^@ you R!)
-	binSizeString <- paste( binSize, 'kbp', sep='')
-
-	for (i in 1:length(sampleNames) ){
-		
-		sample <- sampleNames[i]
-		usedReads  <- readCountsFiltered@phenoData@data$used.reads[i]
-		catMsg( c("Creating plots for sample: ", sample, " (", binSizeString, ")" ) )	
-
-		type <- 'CopyNumbers'
-		img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png',  sep='')
-		img_file_path <- paste( outputPath, '/', img_file, sep='' )
-
-		## COPYNUMBER PLOT
-		png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); 
-			par( PAR_SET )
-			plot( copyNumbersSmooth[ ,sample ], main=paste(sample, ": CopyNumbers", sep="") ) 
-			mtext( paste( binSizeString, " bins", sep=""), 3 )
-			abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
-		dev.off()
-		
-		plotted_images[[ paste(binSize, sample, type, sep="_" ) ]] <- img_file
-		
-		if ( doSegment ){
-			type <- 'Segmented'
-			img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png',  sep='')
-			img_file_path <- paste( outputPath, '/', img_file, sep='' )
-			
-			## COPYNUMBER PLOT
-			png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); 
-				par( PAR_SET )
-				plot( copyNumbersSegmented[ ,sample ], main=paste(sample, ": CopyNumbers and Segments", sep="") )
-				mtext( paste( "(", binSizeString, " bins)", sep=""), 3 )
-				abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
-			dev.off()
-
-			plotted_images[[ paste(binSize, sample, type, sep="_" ) ]] <- img_file
-
-			catMsg( c(" Fusing regions of sample: ", sample) )
-			regions[[ sample ]] <- fuseRegions( cgh[, sample] )
-
-			region_count <- nrow( data.frame( regions[[ sample ]] ) )
-			catMsg( c( ' sample "', sample, '" has ', region_count, " regions" ) )
-			plotted_images[[ paste(binSize, sample, 'region_count', sep="_" ) ]] <- region_count
-		}
-		
-		## add USED read counts
-		plotted_images[[ paste(binSize, sample, 'usedReads', sep="_" ) ]] <- usedReads
-	}
-
-	if ( doSegment ){
-		saveRDS( regions, robjRegionPath )
-		plotted_images <- outputRegionsFromList( regions, outputBasename=outputName, outputDir=outputPath, binSize=binSize, storeList=plotted_images )	
-	}
-}# end bin
-
-
-## ----- debug -----
-#catMsg( "done" )
-#q(status=0)
-## ---- /debug -----
-
-
-## ------------------------
-## prepare output
-## ------------------------
-catMsg( "...zipping output")
-zip_cmd <- paste( "zip -j", gzipOutputPath, paste(outputPath,'/*',sep='') ) ## -j is for removing dirs from the tree
-system( zip_cmd )
-
-## ------------------------
-## get filesizes for report
-## ------------------------
-zippedSize <- paste( round( file.info( gzipOutputPath )[["size"]] / 1e+6, digits=2 ), 'MB' )
-endTime <- Sys.time()
-timeDiff <- format( round( endTime - startTime, 3 ) )
-analysisEnd <- as.character( endTime )
-
-## ------------------------
-## creating html output to be linked to from the middle galaxy pane
-## ------------------------
-sink( file = htmlOutputPath, type = "output" )
-		cat( "<html>\n")
-		cat( "<head>\n")
-
-			cat( "\t", '<title>QDNAseq Report | ', outputName,'</title>', "\n", sep='' )
-			cat( "\t", '<link rel="stylesheet" href="', PURE_CSS, '">', "\n", sep='' )
-			cat( "\t<style>\n", sep='')
-				## include CSS into html file, makes it more portable
-				cat( "\t\t", readLines( CSS_FILE ), sep="\n\t\t" )
-				#cat( "\t\th1 {color:red;}", "\n")
-			cat( "\n\t</style>\n" )
-			
-		cat( "\n</head>\n")
-		cat( "\n<body>\n")
-
-		cat( "<h1>QDNAseq Report</h1>", "\n")
-		
-		cat( '<h3 class="qdnaseq">About this analysis</h3>', "\n")
-		cat( '<p>This page provides access to all results. To have a local copy of this report just download the <a href="', gzipOutputName, '" class="button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep='')		
-		
-		## ------------------------
-		## table with general info
-		## ------------------------
-		cat( '<h3 class="qdnaseq">Settings</h3><p>', "\n")
-		cat( '<table class="pure-table pure-table-striped"><tbody>' )
-			cat( htmlTableRow( c( "AnalysisName", outputName ) ) )
-			cat( htmlTableRow( c( "AnalysisStart", analysisStart ) ) )
-			cat( htmlTableRow( c( "AnalysisEnd", analysisEnd ) ) )
-			cat( htmlTableRow( c( "AnalysisTime", timeDiff ) ) )
-			cat( htmlTableRow( c( "BinSizes (kbp)", paste(binSizes,collapse=", ") ) ) )
-			cat( htmlTableRow( c( "R info", rVersion ) ) )
-			cat( htmlTableRow( c( "QDNAseq info", qdnaseqVersion ) ) )
-			
-			sampleStrings <- c()
-			for ( galaxyName in fileList[[ 'all_files' ]] ){
-				sampleName <- fileList[[ galaxyName ]]
-				sampleStrings <- c( sampleStrings, paste( galaxyName, ' (', sampleName, ')', sep='' ) )
-			}
-			cat( htmlTableRow( c( "InputBams", paste( sampleStrings, collapse=", ") ) ) )
-
-		cat( "</tbody></table></p>", "\n")
-		
-		## ------------------------
-		## list with links to all output files
-		## ------------------------
-		cat( '<h3 class="qdnaseq">Output files</h3><p>', "\n")
-		cat( '<p>This table contains output files that can be used for local downstream analysis with the bioconductor QDNAseq package. For each bin-size / data-level there is a R data structure file with data of all samples. See ', htmlLink( WEB_LINK, 'the bioconductor QDNAseq documentation' ), ' for more information on how to work with these files</p>', "\n", sep='')
-		cat( '<table class="pure-table pure-table-striped">', "\n" )
-		cat( '<thead><th>Type</th>', as.vector( mapply( paste, "<th>", binSizes, "kbp</th>", sep="" ) ),'</thead>', "\n" )
-		cat( "<tbody>", "\n")
-			files <- list()
-			fileTypes <- c( 'ReadCounts.rds', 'CopyNumbers.rds' )
-			if ( doSegment ){ fileTypes <- c( fileTypes, 'CopyNumbersSegmented.rds') }
-
-			for ( fileType in fileTypes ){
-				fileNames <- mapply( paste, binSizes, paste( 'kbp_QDNAseq', fileType, sep=''), sep='')
-				fileLinks <- mapply( htmlLink, fileNames, paste( binSizes, "kbp", sep="" ) )
-				cat( htmlTableRow( c( fileType, fileLinks ) ) )	
-			}
-		cat( "\n</tbody></table></p>", "\n")
-
-		## ------------------------
-		## table with links to files	
-		## ------------------------
-		ratio <- PLOT_WIDTH / PLOT_HEIGHT
-		width <- 960; height <- width / ratio ## bigger img
-		width_t <- 100; height_t <- 40 ## thumb img
-
-		cat( '<h3 class="qdnaseq">Results: overview</h3><p>', "\n")
-		cat( '<p>This table contains the visual results of the copy number aberration analysis. You can click on an image to jump to the larger version. If segmentation was performed as well the number of segments is shown and a file with genomic regions can be downloaded (just remember to inspect the results carefully as this is a more exprimental step).</p>', "\n", sep='')
-		plots_html <- ''
-
-		colspan <- 1
-		binHeader <- "<th>Image</th>"
-		if ( doSegment ){ # extra column with segment info
-			colspan <- 2 
-			binHeader <- "<th>Image</th><th>Segments</th>"
-		} 
-		cat( '<table class="pure-table pure-table-striped">', "\n" )
-		cat( '<thead><tr><th></th><th></th>', as.vector( mapply( paste, "<th colspan=\"", colspan,"\">", binSizes, "kbp</th>", sep="" ) ), '</tr></thead>' )
-		cat( '<thead><tr><th>Sample / File</th><th>Reads</th>', rep( binHeader, length(binSizes) ), '</tr></thead>' )
-		cat( '<tbody>' )
-
-			for ( bam_file in bamsNames ){
-				
-				usedReads <- plotted_images[[ paste(binSize, bam_file, 'usedReads', sep="_" ) ]]
-				usedReads <- format( as.integer(usedReads), digits=4, decimal.mark=".", big.mark="," )
-				htmlRow <- paste( '<tr><td>', bam_file, '</td><td>', usedReads, '</td>', sep='' )
-
-				for ( binSize in binSizes ){
-					
-					## add thumbnails to table with links to anchors on html page
-					copy_img <- plotted_images[[ paste(binSize, bam_file, 'CopyNumbers', sep="_" ) ]]
-					html_copy_thumb <- htmlLink( path=paste('#', copy_img, sep=''), paste('<img src="',copy_img,'" alt="', bam_file, '" width="', width_t, '" height="', height_t, '">', sep='') )
-					html_copy_img <- htmlLink( path=copy_img, paste('<img id="', copy_img,'" src="',copy_img,'" alt="',bam_file, '" width="', width, '" height="', height, '">', sep='') )
-					html_segm_img <- ''
-					html_bedGraph <- ''
-					region_count <- ''
-					htmlRow <- paste( htmlRow, '<td>', html_copy_thumb, '</td>' )
-
-					if ( doSegment ){
-						segm_img <- plotted_images[[ paste(binSize, bam_file, 'Segmented', sep="_" ) ]]
-						region_count <- plotted_images[[ paste(binSize, bam_file, 'region_count', sep="_" ) ]]
-
-						html_bedGraph <- htmlLink( path=plotted_images[[ paste(binSize, bam_file, 'bedgraph', sep="_" ) ]], 'bedGraph' )
-						html_segm_img <- htmlLink( path=segm_img, paste('<img id="', segm_img,'" src="', segm_img,'" alt="', bam_file, '" width="', width, '" height="', height,'">', sep='') )
-						htmlRow <- paste( htmlRow, '<td>', region_count, ' (', html_bedGraph, ')</td>', sep="" )
-					}
-					plots_html <- paste( plots_html, html_copy_img, "\n", html_segm_img, "\n<br \\>\n", sep='' )	
-				}
-				plots_html <- paste( plots_html, "\n<hr \\>\n", sep='' )
-				## add info to overview table, including small thumbnails
-				htmlRow <- paste( htmlRow, '</tr>', sep='' )
-				cat( htmlRow, "\n" )				
-			}
-		cat( "</tbody></table></p>", "\n")
-		
-		## ------------------------
-		## section with various output shown
-		## ------------------------
-		cat( '<h3 class="qdnaseq">Results: Sample plots</h3><p>', "\n")
-		## now include (large) images in html page
-		cat( plots_html, "\n")
-		cat( "\n</p></body>\n")
-		cat( "\n</html>\n")
-sink()
-
-## ------------------------
-## creating main html output for galaxy history
-## ------------------------
-if ( inGalaxy ){ # dont create when running outside Galaxy
-	sink( file = outputHtml, type = "output" )
-			
-		cat( "<head>", "\n")
-			cat( "\t", '<link rel="stylesheet" href="', PURE_CSS, '">', "\n", sep='' )
-
-			cat( "<style>", "\n")
-				## include CSS directly into html file
-				cat( paste( "\t", '/* the css here originates from ', CSS_FILE,' */', "\n") )
-				cat( paste( "\t", readLines( CSS_FILE, n = -1)), sep="\n" )
-			cat( "</style>", "\n")
-		cat( "</head>", "\n")
-
-		cat( '<h1>QDNAseq Results (', outputName,')</h1>', "\n", sep="")
-		cat( '<p>Explore <a href="', htmlOutputName, '" class="button">the results</a> directly within galaxy</p>', "\n", sep="")
-		cat( '<p>Or download a <a href="', gzipOutputName, '" class="button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep="" )
-
-	sink()
-}
-
-## ------------------------
-## create final zip and quit with status 0 to tell galaxy all was fine
-## ------------------------
-catMsg( "zipping all output")
-system( paste( "zip -j ", gzipOutputPath, paste(outputPath,'/', htmlOutputName, sep='') ) )
-catMsg( "done" )
-q(status=0)
--- a/QDNAseq.sh	Mon Oct 06 05:33:48 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,148 +0,0 @@
-#!/bin/sh
-
-## This script creates a config file for use with the QDNAseq R wrapper
-## For use outside Galaxy
-
-QDNASEQ_R_SCRIPT='/ccagc/home/stef/code/QDNASEQ/QDNAseq.R'
-R_LOCATION='/ccagc/lib/R/R-3.1.0/bin/'
-OUTPUT_DIR='QDNASEQ'
-SCRIPT_DIR=$(dirname $0)
-SCRIPT_NAME=$(basename $0)
-
-binSizesString='1000,100,15'
-outputName=`basename $PWD`
-doSegment='TRUE'
-debug='FALSE'
-
-usage()
-{
-cat<<EOF
-  This script creates a QDNAseq wrapper config.
-  
-  Experiment name and/or output dir are optional
-    $SCRIPT_NAME [-n myExperiment] [-o outputDir] *.bam
-
-  Debug mode for working with build in LGG150 data
-    $SCRIPT_NAME -d
-
-  OPTIONS:
-    -n [s]  provide analysis name [$outputName]
-    -o [s]  output dir [$OUTPUT_DIR]
-    -d      debug mode (only create config)
-    
-EOF
-}
-
-PARAM_COUNT=0
-if [ $# -eq 0 ] || [ "$1" == "-h" ]
-then
-	usage
-	exit 1
-else
-	while getopts "n:o:d" opt; do
-	  case $opt in
-	    n)
-	      #echo "$opt was triggered, Parameter: $OPTARG" >&2
-	      outputName=$OPTARG
-	      PARAM_COUNT=`bc <<< $PARAM_COUNT+2`
-	      ;;
-	    o)
-	      OUTPUT_DIR=$OPTARG
-	      PARAM_COUNT=`bc <<< $PARAM_COUNT+2`
-	      ;;
-	    d)
-	      debug='TRUE'
-	      PARAM_COUNT=`bc <<< $PARAM_COUNT+1`
-	      ;;
-	    \?)
-	      echo "Invalid option: -$OPTARG" >&2
-	      exit 1
-	      ;;
-	    :)
-	      echo "Option -$OPTARG requires an argument." >&2
-	      exit 1
-	      ;;
-	  esac
-	done
-fi
-
-## set param dependent variables
-printf '%s\n' "[INFO] --- QDNAseq wrapper config creator ---"
-CONFIG_FILE=$OUTPUT_DIR'/qdnaseqConfig.R'
-
-## remove the params from input string leaving only the BAMs
-shift $PARAM_COUNT
-
-## sanity checks
-if [ -e $CONFIG_FILE ]; then
-	echo "[ERR] Config file ($CONFIG_FILE) already exists, exit.."; 
-	exit 0
-fi
-if [ -d $OUTPUT_DIR ]; then
-	echo "[ERR] Output dir ($OUTPUT_DIR) already exists, exit.."; 
-	exit 0
-else
-	mkdir $OUTPUT_DIR
-fi
-
-## variables setup
-CONFIG_TXT="## ==========
-## QDNAseq pipeline config file
-## ==========
-
-## ----------
-inGalaxy <- FALSE
-binSizesString <- '$binSizesString'
-experimentType <- 'SR50'
-outputName <- '$outputName'
-
-## ----------
-outputHtml  <- 'galaxyIndex.html'
-outputId    <- NA
-newFilePath <- '$OUTPUT_DIR'
-outputPath  <- '$OUTPUT_DIR'
-doSegment   <- as.logical( $doSegment )
-debug       <- as.logical( $debug )
-undoSD      <- as.double( 1.0 )
-binAnnotations <- ''
-
-## ----------
-filterBlacklistedBins <- as.logical( 'TRUE' )
-mappabilityCutoff <- as.integer( 0 )
-undoSplits <- 'sdundo'
-doOutputCopynumbersIgv <- FALSE
-
-## ----------
-PLOT_WIDTH <- as.integer( 1440 )
-PLOT_HEIGHT <- as.integer( 720 )
-excludeChrsString <- 'X,Y'
-
-## ----------
-bamsPaths <- c()
-bamsNames <- c()
-"
-
-## create config file
-printf '%s\n' "$CONFIG_TXT" > $CONFIG_FILE
-
-## add bam files to config file
-BAMS=$@
-for bam_path in $BAMS; do
-	
-	if [ ! -e $bam_path  ]; then
-		echo "[ERR] Bam file does not exist ($bam_path), exit.."; 
-		exit 0
-	fi
-
-	bam_name=`basename "$bam_path"`
-	printf '%s\n' "bamsPaths <- c( bamsPaths, \"$bam_path\" )" >> $CONFIG_FILE
-	printf '%s\n' "bamsNames <- c( bamsNames, \"$bam_name\" )" >> $CONFIG_FILE
-	printf '%s\n' "[INFO] BAM: $bam_name"
-done
-
-## done
-printf '%s\n' "[INFO] OUTPUT DIR:  $OUTPUT_DIR"
-printf '%s\n' "[INFO] OUTPUT NAME: $outputName"
-printf '%s\n' "[INFO] CONFIG FILE can be found in $CONFIG_FILE"
-printf '%s\n' "[INFO] Run with: $R_LOCATION/Rscript $QDNASEQ_R_SCRIPT $CONFIG_FILE"
-printf '%s\n' "[INFO] Done"
\ No newline at end of file
--- a/QDNAseq.xml	Mon Oct 06 05:33:48 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,291 +0,0 @@
-<tool id="QDNAseq" name="QDNAseq" version="0.0.2" force_history_refresh="True">
-  
-  <requirements>
-    
-    <!-- R 3.1.0 dependency will be used instead when available, now default R is used, see command -->
-    <requirement type="package" version="3.1.0">R</requirement>
-    <!-- <requirement type="package" version="1.0.5">qdnaseq</requirement> -->
-    <requirement type="package" version="0.1.18">samtools</requirement>
-  </requirements>
-
-  <description>Quantitative copy number abberation detection</description>
-
-  <!-- change to /full/path/to/Rscript if required (eg /ccagc/lib/R/R-3.1.0/bin/Rscript) -->
-  <command interpreter="Rscript"> 
-    QDNAseq.R 
-    $qdnaseq_cfg
-  </command>
-
-  <stdio>
-    <!-- Anything higher than 0 means the R script didnt finish (correctly) -->
-    <!-- Because different R packages deal with err/warn differently unable to waterproof this -->
-    <exit_code range="1:" level="fatal" description="R script didnt finish correctly, check log" />
-  </stdio>
-  
-  <inputs>
-    
-    <!-- ==================== -->
-    <!-- General inputs -->
-    <!-- ==================== -->
-    
-    <!-- Job name: must contain non-whitespace chars -->
-    <param name="jobName" type="text" optional="false" label="Analysis/ouput name" help="Supply a name for the outputs to remind you what they contain" value="TEST">
-      <validator type="empty_field" />
-      <validator type="regex" message="This field should contain some non-whitespace character">.*\S</validator>
-    </param>
-
-    <!-- Bin Size: only certain sizes are supported by QDNAseq package -->
-    <param name="binSizes" type="select" optional="false" multiple="true" label="Select bin-sizes to use (kb)" help="Larger bin sizes provide faster analysis but lower resolution">
-      <option value="1000" selected="true">1Mb</option>
-      <option value="100" selected="true">100kb</option>
-      <option value="30">30kb</option>
-      <option value="15" selected="true">15kb</option>
-      <option value="5">5kb</option>
-      <option value="1">1kb</option>
-    </param>
-
-    <!-- Experiment type: only one type (SR50) supported now, maybe more in the future-->
-    <param name="experimentType" type="select" label="Type of sequencing data" help="Currently only single end reads of lenght 50 are supported within galaxy">
-      <option value="SR50">Single Reads of 50bp</option>
-      <!-- <option value="PE1000">PairedEnd1000</option> -->
-    </param>
-
-    <!-- ==================== -->
-    <!-- Input BAMs -->
-    <!-- ==================== -->
-    <param name="bams" type="data" multiple="true" optional="True" format="bam" label="Input BAMs" help="Select the BAM files to analyze" />
-
-    <!-- ==================== -->
-    <!-- Optional segmenting -->
-    <!-- ==================== -->
-    <param name="doSegment" type="select" label="Also perform segmentation" help="Segmentation collects bins with similar ratio into regions">
-      <option value="TRUE">yes</option>
-      <option value="FALSE">no</option>
-    </param>
-
-    <!-- ==================== -->
-    <!-- Option to use your own bin annotations file -->
-    <!-- ==================== -->
-    <conditional name="binannotations_source">
-      <param name="show" type="select" label="Bin annotations to use" help="Default bin-annotations are for GRCh37/hg19 and tuned for 50bp reads (single end)">
-        <option value="default">Default</option>
-        <option value="history">From history</option>
-      </param>
-      <when value="history">
-        <param name="binannotation_file" type="data" multiple="false" label="R data structure file (*.rds) with bin-annotations" help="If you made your own bin-annotations with the QDNAseq bioconductor package you can upload them to your history and select here" />
-      </when>
-      <when value="default">
-        <param name="binannotation_file" type="hidden" value="" />
-      </when>
-      
-    </conditional> 
-
-    <!-- ==================== -->
-    <!-- Optional advanced options -->
-    <!-- ==================== -->
-    <conditional name="advanced">
-      <param name="show" type="select" label="Use advanced options" help="Select yes to show and use filter and output options">
-        <option value="no">no</option>
-        <option value="yes">yes</option>
-      </param>
-      <when value="yes">
-        
-        <param name="copynumbers_igv" type="select" label="Also output copynumber IGV file to history">
-          <option value="FALSE">no</option>
-          <option value="TRUE">yes</option>
-        </param>
-
-        <param name="undo_splits" type="select" label="undoSplits" help="If set to sdundo, see undoSD below">
-          <option value="sdundo">sdundo</option>
-          <option value="prune">prune</option>
-          <option value="none">none</option>
-        </param>
-
-        <param name="undoSD" size="10" type="float" value="1" label="undoSD" help='The number of SDs between means to keep a split if undo.splits="sdundo".' />
-          
-        <param name="blacklist" type="select" label="Filter blacklisted bins (blacklist)" help="Will exclude all blacklisted bins in the binannotation if set">
-          <option value="TRUE">yes</option>
-          <option value="FALSE">no</option>
-        </param>
-
-        <param name="mappability" type="integer" value="0" min="0" max="100" label="Filter bins with lower mappability" help="Will exclude all bins will lower mappability than this number (0-100)" />
-      
-        <!-- ==================== -->
-        <!-- Optional graphical/plotting options -->
-        <!-- ==================== -->
-        <param name="plot_width" size="3" type="integer" value="1440" label="Width of the png image produced" />
-        <param name="plot_height" size="3" type="integer" value="720" label="Height of the png image produced" />
-        <param name="exclude_chrs" type="select" multiple="true" label="Hide these chromosomes in plots" help="Currently only standard human chromosomes supported. NOTE: other filters might also exclude chromosomes">
-          <option value="1">1</option><option value="2">2</option>
-          <option value="3">3</option><option value="4">4</option>
-          <option value="5">5</option><option value="6">6</option>
-          <option value="7">7</option><option value="8">8</option>
-          <option value="9">9</option><option value="10">10</option>
-          <option value="11">11</option><option value="12">12</option>
-          <option value="13">13</option><option value="14">14</option>
-          <option value="15">15</option><option value="16">16</option>
-          <option value="17">17</option><option value="18">18</option>
-          <option value="19">19</option><option value="20">20</option>
-          <option value="21">21</option><option value="22">22</option>
-          <option value="X" selected="true">X</option>
-          <option value="Y" selected="true">Y</option>
-        </param>
-      </when>
-
-      <!-- need to set defaults because params are passed to R regardless of conditional opened/closed -->
-      <when value="no">
-        <param name="copynumbers_igv" type="hidden" value="FALSE" />
-        <param name="undoSD" type="hidden" value="1" />
-        <param name="undo_splits" type="hidden" value="sdundo" />
-        <param name="blacklist" type="hidden" value="TRUE" />
-        <param name="mappability" type="hidden" value="0" />
-        <param name="plot_width" type="hidden" value="1440" />
-        <param name="plot_height" type="hidden" value="720" />
-        <param name="exclude_chrs" type="hidden" value="X,Y" />
-      </when>
-    </conditional>
-
-    <!-- ==================== -->
-    <!-- Option to perform a test run with built in data -->
-    <!-- ==================== -->
-    <param name="debug" type="select" label="Run with test data" help="Use inbuilt LGG150 data instead of input BAMs">
-      <option value="FALSE">no</option>
-      <option value="TRUE">yes</option>
-    </param>
-    
-  </inputs>
-  <!-- ==================== -->
-  <!-- Config file to pass params to R script -->
-  <!-- ==================== -->
-  <configfiles>
-    <configfile name="qdnaseq_cfg">
-## Desc: this file was sourced in QDNAseq R wrapper script
-##  as means to pass all galaxy params to R
-
-## -----
-## required params
-## -----
-TRUE -> inGalaxy 
-"${binSizes}" -> binSizesString
-"${experimentType}" -> experimentType
-"${jobName}" -> outputName
-
-## -----
-## extra main params
-## -----
-"${htmlFile}" -> outputHtml
-"${htmlFile.id}" -> outputId
-"${__new_file_path__}" -> newFilePath
-
-"${htmlFile.files_path}" -> outputPath
-as.logical( "${doSegment}" ) -> doSegment
-as.logical( "${debug}" ) -> debug
-
-## -----
-## own bin-annotations file options
-## -----
-"${binannotations_source.binannotation_file}" -> binAnnotations
-
-## -----
-## advanced options
-## -----
-as.double( "${advanced.undoSD}" ) -> undoSD
-as.logical( "${advanced.blacklist}" ) -> filterBlacklistedBins
-as.integer( "${advanced.mappability}" ) -> mappabilityCutoff
-"${advanced.undo_splits}" -> undoSplits
-as.logical( "${advanced.copynumbers_igv}" ) -> doOutputCopynumbersIgv
-
-## #for binSize in $binSizes}.split(",")# 
-## "${binSize}kbp_${igvCopyNumbers}" -> copyNumbersIgvDatasetFile
-## #end for
-
-## -----
-## plot options
-## -----
-as.integer( "${advanced.plot_width}" ) -> PLOT_WIDTH
-as.integer( "${advanced.plot_height}" ) -> PLOT_HEIGHT
-"${advanced.exclude_chrs}" -> excludeChrsString
-  
-## -----
-## input BAMs init
-## -----
-c() -> bamsPaths
-c() -> bamsNames
-
-#for bam in $bams# 
-c( bamsPaths, "${bam}" ) -> bamsPaths
-c( bamsNames, "${bam.name}" ) -> bamsNames
-#end for
-
-    </configfile>
-  </configfiles>
-
-  <!-- ==================== -->
-  <!-- Main output is an html based report -->
-  <!-- ==================== -->
-  <outputs>
-
-    <!-- main output is a html report -->
-    <!-- ...but there can be more outputs using the id of the htmlFile output -->
-    <data format="html" name="htmlFile" label="QDNAseq: ${jobName}" />
-
-  </outputs>
-
-  <!-- ==================== -->
-  <!-- Tests still to be done -->
-  <!-- ==================== -->
-
-  <!-- 
-  <tests>
-    <test>
-      <param name="input1" value="input1" />   
-      <param name="input2" value="input2" />   
-    </test>
-  </tests>
-  -->
-
-  <help>
-.. class:: infomark
-
-**Introduction**
-
-This tool is a wrapper for the R Bioconductor package QDNAseq_
-
-.. _QDNAseq: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
-
-It determines the copy number state of human chromosomes 1 - 22 for (shallow coverage) whole genome sequencing data.
-
-For questions/remarks about the galaxy part of this tool, see contact form here_
-
-.. _here: http://www.stefs.nl/wp/contact
-
-You can **test this tool** with built-in data by selecting the option "Run with test data" and press execute.
-
------
-
-.. class:: warningmark
-
-As there is no R 3.1.0 galaxy-package yet (a requirement for QDNAseq), the **dependencies** need to be installed by hand and available to the user under which galaxy runs: R (3.1.0) and bioconductor package QDNAseq (>= 1.0.5). In case the default R is not 3.1.0, also the wrapper xml must be updated to include the correct path during installation of this tool.
-
-.. class:: warningmark
-
-The input BAMs are expected to be **single end reads of 50bp length** mapped to GRCh37/hg19 genome build. Other experiment setups are currently not tested or supported within galaxy. See the documentation of QDNAseq at bioconductor on how to deal with different setups (or keep fingers crossed ;) )
-
-.. class:: warningmark
-
-Requires **internet access** for downloading bin-annotations from bitbucket and to show some styling (css) of the final report
-
------
-
-**Citation**
-
-For the underlying QDNAseq R package please cite: llari Scheinin, Daoud Sie et al. DNA copy number analysis of fresh and formalin-fixed specimens by whole-genome sequencing: improved correction of systematic biases and exclusion of problematic regions, (submitted). See also the bioconductor package_ documentation.
-
-.. _package: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
-
-.. image:: LGG150_copynumber.png
-.. image:: LGG150_copynumberSegmented.png
-
-  </help>
-
-</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qdnaseq_test-41387ffa145f/QDNAseq.R	Mon Oct 06 11:04:42 2014 -0400
@@ -0,0 +1,529 @@
+#!/usr/bin/Rscript
+
+## --------------------
+## prints all arguments as msg
+## --------------------
+catMsg <- function( msg=c() ){	
+	cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='')
+}
+## --------------------
+## return the location of this script
+## --------------------
+getScriptPath <- function(){
+    cmd.args <- commandArgs()
+    m <- regexpr("(?<=^--file=).+", cmd.args, perl=TRUE)
+    script.dir <- dirname(regmatches(cmd.args, m))
+    if( length(script.dir) == 0 ) stop("[ERR] Can't determine script dir: please call the script with Rscript\n")
+    if( length(script.dir) > 1 ) stop("[ERR] Can't determine script dir: more than one '--file' argument detected\n")
+    return(script.dir)
+}
+## --------------------
+## Some html creation functions
+## --------------------
+htmlTableRow <- function( string_array=c() ){
+	td_cells <- ''
+	for ( i in string_array ){ 
+		td_cells <- paste( td_cells, '<td>', i, '</td>', sep='' )	
+	}
+	return( paste( "<tr>", td_cells, "</tr>") )
+}
+htmlLink <- function( path, desc="LINK" ){
+	return( paste( '<a href="', path, '">', desc, "</a>", sep='') )
+}
+## --------------------
+## constructs a list with input bam file info
+## --------------------
+makeBamFileList <- function( paths, names ){	
+	tmp <- list()
+	l1 <- length(paths)
+	l2 <- length(names)
+	if ( l1 != l2 ) stop( "Unequal amount of bam-paths (", l1, ") and -names (", l2, ") in makeBamFileList!!!\n" )
+	if ( l1 == 0 ){ return(tmp) } # empty list in debug mode
+
+	for ( i in 1:length(paths) ){
+		path <- paths[i]
+		name <- names[i]
+		file <- basename(path)
+
+		tmp[[ file ]] <- name
+		tmp[[ 'all_paths' ]] <- c( tmp[[ 'all_paths' ]], path )
+		tmp[[ 'all_files' ]] <- c( tmp[[ 'all_files' ]], file )
+		tmp[[ 'all_names' ]] <- c( tmp[[ 'all_names' ]], name )
+	}
+	return( tmp )
+}
+
+## --------------------
+## copied code for extracting the regions by segment call status
+## --------------------
+fuseRegions <- function( obj, minRatio=0 ) {
+	if ( ncol(obj) > 1 ) stop('Please specify which sample...')
+
+	data <- data.frame( obj@featureData@data[,1:3], copynumber(obj), segmented(obj), check.names=FALSE, stringsAsFactors=FALSE)
+	colnames( data ) <- c( "chr", "start", "end", "log2", "segmentval" )
+	
+	fused.data <- data.frame()
+	curr.bin <- 1
+	for ( chr in unique( data$chr ) ) {
+		chr.data  <- data[ data$chr == chr, ]
+		prev.bin  <- curr.bin
+		prev.log2 <- chr.data[ 1, 'log2' ]
+		prev.segm <- chr.data[ 1, 'segmentval' ]
+		start     <- chr.data[ 1, 'start' ]
+
+		if ( nrow(chr.data) > 1) {
+			for ( i in 2:nrow(chr.data) ) {
+				curr.bin  <- curr.bin + 1
+				curr.segm <- chr.data[ i, 'segmentval']
+
+				if ( curr.segm != prev.segm ) {
+					fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], segmentval=round(prev.segm, digits=DECIMALS) ) )
+					prev.segm <- curr.segm
+					prev.bin  <- curr.bin
+					start     <- chr.data[ i, 'start']
+				}
+			}
+			fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], segmentval=round(prev.segm, digits=DECIMALS) ) )
+		}else{
+			fused.data <- rbind( fused.data, data.frame( chr=chr, start=start, end=chr.data[ i-1, 'end'], segmentval=round(prev.segm, digits=DECIMALS) ) )
+		}
+	}
+	## remove regions with low amplitude
+	fused.data <- fused.data[ abs(fused.data$segmentval) >= minRatio, ]
+	fused.data
+}
+
+## DESC: takes the output of fuse.regions and outputs a txt file per sample
+outputRegionsFromList <- function ( regionsList, outputBasename, outputDir="./", binSize, storeList ){
+	if ( missing(regionsList) ) stop( 'Please provide regionsList...' )
+	if ( missing(outputBasename) ) stop( 'Please provide outputBasename...' )
+	if ( !is.list(regionsList) ) stop( 'Input not a list...?' )
+	if ( length(regionsList) < 1 ) stop( 'List seems empty...?' )
+	if ( file.exists( outputDir ) ) catMsg( c(" Using dir ", outputDir, " for output") )
+	else dir.create( outputDir )
+
+	## have to set R output options otherwise scientific method is used at some point
+	options( "scipen"=100 )
+
+	sampleCount <- length( regionsList )
+	sampleNames <- names( regionsList )
+	bedgraphColumns <- c( 'chr', 'start', 'end', 'segmentval' )
+	
+	catMsg( c( " There are ", sampleCount, " samples found in input list") )
+
+	for ( sample in sampleNames ){		
+		catMsg( c(" Working on sample ", sample ) )
+		regionCount <- nrow( regionsList[[sample]] )
+		
+		outSampleBase   <- paste( outputBasename, '_', sample, '_', binSize, 'kbp', sep='')
+		outBedgraphFile <- paste( outSampleBase, '.bedGraph', sep="" )
+		outBedgraphPath <- paste( outputDir, '/', outBedgraphFile, sep="" )
+
+		## ---------- BEDGRAPH ----------
+		txt <- paste( "track type=bedGraph color=0,100,0 altColor=255,0,0 name=", sample," description=segmented_regions_from_QDNAseq_",binSize,"kbp\n", sep="")
+		sink( outBedgraphPath )
+			cat( txt )
+		sink()
+		write.table( regionsList[[sample]][,bedgraphColumns], outBedgraphPath, quote=F, sep="\t", row.names=F, append=T, col.names=F)
+		#outFiles[[sample]] <- c( outBedgraphFile )
+		storeList[[ paste( binSize, sample, 'bedgraph', sep="_")]] <- outBedgraphFile
+	}
+	return(storeList)
+}
+
+
+## ==================================================
+## Start of analysis
+## ==================================================
+MAIN_NAME <- '[INFO] '
+TOOL_PATH <- getScriptPath()
+CSS_FILE  <- paste( TOOL_PATH, '/static/css/QDNAseq.css', sep="" )
+DECIMALS  <- 3
+WEB_LINK  <- 'http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html'
+PURE_CSS  <- 'http://yui.yahooapis.com/pure/0.5.0/pure-min.css'
+
+catMsg( "Starting QDNAseq wrapper" )	
+catMsg( "Loading R libraries" )
+suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) )
+suppressWarnings( suppressMessages( library( CGHcall, quietly = TRUE ) ) )
+
+## only one param: the tmp config file
+cmdLineArgs <- commandArgs(TRUE)
+config      <- cmdLineArgs[1]
+
+## sourcing the config file will load all input params
+## many variables are imported via sourced "config"
+source( config )
+
+## desparate tries to make png text scale well, damn you R...!
+PLOT_RES  <- min( PLOT_WIDTH, PLOT_HEIGHT ) / 6.3 
+PAR_SET   <- list( pch=22 )
+
+systemUser <- system("whoami",T)
+qdnaseqVersion <- packageDescription( "QDNAseq" )$Version
+rVersion <- R.version.string
+startTime <- Sys.time()
+analysisStart <- as.character( startTime )
+catMsg( c("QDNAseq version: ", qdnaseqVersion) )
+catMsg( c( rVersion ) )
+
+## get the comma separated list of chromosomes to exclude
+excludeChrs <- unlist( strsplit( excludeChrsString, ",") )
+binSizes <- as.numeric( unlist( strsplit( binSizesString, ",") ) )
+
+
+## ------------------------
+## DEBUG
+if ( debug ){
+	catMsg( c("Analysis run by user: ", systemUser ) )
+	catMsg( c("DEBUG SessionInfo: " ) )
+	print( sessionInfo() )
+}
+## /DEBUG
+## ------------------------
+
+## prepare output dir
+if ( !file.exists( outputPath) ){
+	dir.create( outputPath )
+}
+
+## copy source config file to output dir to include it in output zip
+if ( inGalaxy ){
+	file.copy( config, paste(outputPath, 'galaxyConfigFile.R', sep='/') )	
+}
+
+## setup bam filelist for easy retrieval later
+fileList    <- makeBamFileList( bamsPaths, bamsNames )
+bamCount    <- length( fileList[[ 'all_paths' ]] )
+
+gzipOutputName <- paste( 'QDNAseqResults_', outputName, '.zip', sep='' )
+gzipOutputPath <- paste( outputPath, '/', gzipOutputName, sep='')
+htmlOutputName <- 'index.html'
+htmlOutputPath <- paste( outputPath, '/', htmlOutputName, sep='')
+
+plotted_images <- list() # to keep track of images for later linking
+regions <- list() # will contain the segments
+outputFiles <- list()
+
+## ------------------------
+## in case of debug just use inbuilt LGG data for speedup
+if ( debug ){ 
+	binSizes <- c(15)
+	bamsPaths  <- c( "BUILD_IN_DATA")
+	bamsNames  <- c( "LGG150")
+	fileList   <- makeBamFileList( bamsPaths, bamsNames )
+	bamCount   <- length( fileList[[ 'all_paths' ]] )
+}
+
+for ( binSize in binSizes ){
+
+	## ------------------------
+	## construct output file-names and -paths
+	## ------------------------
+	robjReadCoName <- paste( binSize, 'kbp_QDNAseqReadCounts.rds', sep='')
+	robjCopyNrName <- paste( binSize, 'kbp_QDNAseqCopyNumbers.rds', sep='')
+	igvCopyNrName  <- paste( binSize, 'kbp_QDNAseqCopyNumbers.igv', sep='')
+	robjSegmntName <- paste( binSize, 'kbp_QDNAseqCopyNumbersSegmented.rds', sep='')
+	regiOutputName <- paste( binSize, 'kbp_QDNAseqRegions.rds', sep='')
+	noiseImgName   <- paste( binSize, 'kbp_QDNAseqNoiseplot.png', sep='')
+	
+	robjReadCoPath <- paste( outputPath, '/', robjReadCoName, sep='')
+	robjCopyNrPath <- paste( outputPath, '/', robjCopyNrName, sep='')
+	robjSegmntPath <- paste( outputPath, '/', robjSegmntName, sep='')
+	robjRegionPath <- paste( outputPath, '/', regiOutputName, sep='')
+	igvCopyNrPath  <- paste( outputPath, '/', igvCopyNrName, sep='')
+	noiseImgPath   <- paste( outputPath, '/', noiseImgName, sep='')
+
+	binAnnFile <- paste( TOOL_PATH, '/static/binannotation/', binSize, 'kbp_binAnnotations.rds', sep="" )
+	if ( file.exists(binAnnFile) ){
+		binAnnotations <- readRDS( binAnnFile )
+		catMsg( c("Using local binAnnotations file" ) )
+	}else{
+		binAnnotations <- getBinAnnotations( binSize=binSize, type=experimentType )
+	}
+
+	## in case of debug just use inbuilt LGG data for speedup
+	if ( debug ){
+		data(LGG150)
+		readCounts <- LGG150
+	}else{
+		## provide bamnames because in galaxy everyting is called "dataset_###"
+		readCounts <- binReadCounts( binAnnotations, bamfiles=fileList[[ 'all_paths' ]], bamnames=fileList[[ 'all_names' ]] )	
+	}
+
+	readCountsFiltered    <- applyFilters( readCounts, residual=TRUE, blacklist=filterBlacklistedBins, mappability=mappabilityCutoff, chromosomes=excludeChrs )
+	readCountsFiltered    <- estimateCorrection( readCountsFiltered )
+	copyNumbers           <- correctBins( readCountsFiltered )
+	copyNumbersNormalized <- normalizeBins( copyNumbers )
+	copyNumbersSmooth     <- smoothOutlierBins( copyNumbersNormalized )
+	sampleNames           <- readCountsFiltered@phenoData@data$name
+
+	## save objects to output dir
+	saveRDS( readCountsFiltered, robjReadCoPath );
+	saveRDS( copyNumbersSmooth, robjCopyNrPath );
+	exportBins( copyNumbersSmooth, file=igvCopyNrPath, format="igv" )
+
+	## also save objects for galaxy history output if requested
+	if ( doOutputCopynumbersIgv ){
+		#@ a bit hacky galaxy way to allow an unknown number of output files based on param selection
+		#@ see: https://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files
+		historyName <- paste(binSize, 'kbp-IGV', sep="")
+		igvFile <- paste( newFilePath, "/primary_", outputId, "_", historyName, "_visible_txt", sep="" )
+		exportBins( copyNumbersSmooth, file=igvFile, format="igv" )
+		catMsg( c("Exported igv file to history for ", binSize, "kbp bin") )
+	}
+
+	## proceed with calling if requested
+	if ( doSegment ){
+		copyNumbersSegmented  <- segmentBins( copyNumbersSmooth, undo.splits=undoSplits, undo.SD=undoSD )
+		copyNumbersSegmented  <- normalizeSegmentedBins( copyNumbersSegmented )
+		cgh <- makeCgh( copyNumbersSegmented )
+		saveRDS( copyNumbersSegmented, robjSegmntPath );
+	}
+
+	## ------------------------
+	## create output files
+	## ------------------------
+	png( noiseImgPath, width=PLOT_HEIGHT, height=PLOT_HEIGHT, res=PLOT_RES );
+		par( PAR_SET )
+		noisePlot( readCountsFiltered, main=paste( "Noise Plot ", binSize, "kbp", sep=''), col="darkgreen" )
+	dev.off()
+
+	binSize <- as.character( binSize ) # to avoid R using it as array index... (*#$^@ you R!)
+	binSizeString <- paste( binSize, 'kbp', sep='')
+
+	for (i in 1:length(sampleNames) ){
+		
+		sample <- sampleNames[i]
+		usedReads  <- readCountsFiltered@phenoData@data$used.reads[i]
+		catMsg( c("Creating plots for sample: ", sample, " (", binSizeString, ")" ) )	
+
+		type <- 'CopyNumbers'
+		img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png',  sep='')
+		img_file_path <- paste( outputPath, '/', img_file, sep='' )
+
+		## COPYNUMBER PLOT
+		png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); 
+			par( PAR_SET )
+			plot( copyNumbersSmooth[ ,sample ], main=paste(sample, ": CopyNumbers", sep="") ) 
+			mtext( paste( binSizeString, " bins", sep=""), 3 )
+			abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
+		dev.off()
+		
+		plotted_images[[ paste(binSize, sample, type, sep="_" ) ]] <- img_file
+		
+		if ( doSegment ){
+			type <- 'Segmented'
+			img_file <- paste( sample, '_', binSize, 'kbp_QDNAseq', type, '.png',  sep='')
+			img_file_path <- paste( outputPath, '/', img_file, sep='' )
+			
+			## COPYNUMBER PLOT
+			png( img_file_path, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ); 
+				par( PAR_SET )
+				plot( copyNumbersSegmented[ ,sample ], main=paste(sample, ": CopyNumbers and Segments", sep="") )
+				mtext( paste( "(", binSizeString, " bins)", sep=""), 3 )
+				abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
+			dev.off()
+
+			plotted_images[[ paste(binSize, sample, type, sep="_" ) ]] <- img_file
+
+			catMsg( c(" Fusing regions of sample: ", sample) )
+			regions[[ sample ]] <- fuseRegions( cgh[, sample] )
+
+			region_count <- nrow( data.frame( regions[[ sample ]] ) )
+			catMsg( c( ' sample "', sample, '" has ', region_count, " regions" ) )
+			plotted_images[[ paste(binSize, sample, 'region_count', sep="_" ) ]] <- region_count
+		}
+		
+		## add USED read counts
+		plotted_images[[ paste(binSize, sample, 'usedReads', sep="_" ) ]] <- usedReads
+	}
+
+	if ( doSegment ){
+		saveRDS( regions, robjRegionPath )
+		plotted_images <- outputRegionsFromList( regions, outputBasename=outputName, outputDir=outputPath, binSize=binSize, storeList=plotted_images )	
+	}
+}# end bin
+
+
+## ----- debug -----
+#catMsg( "done" )
+#q(status=0)
+## ---- /debug -----
+
+
+## ------------------------
+## prepare output
+## ------------------------
+catMsg( "...zipping output")
+zip_cmd <- paste( "zip -j", gzipOutputPath, paste(outputPath,'/*',sep='') ) ## -j is for removing dirs from the tree
+system( zip_cmd )
+
+## ------------------------
+## get filesizes for report
+## ------------------------
+zippedSize <- paste( round( file.info( gzipOutputPath )[["size"]] / 1e+6, digits=2 ), 'MB' )
+endTime <- Sys.time()
+timeDiff <- format( round( endTime - startTime, 3 ) )
+analysisEnd <- as.character( endTime )
+
+## ------------------------
+## creating html output to be linked to from the middle galaxy pane
+## ------------------------
+sink( file = htmlOutputPath, type = "output" )
+		cat( "<html>\n")
+		cat( "<head>\n")
+
+			cat( "\t", '<title>QDNAseq Report | ', outputName,'</title>', "\n", sep='' )
+			cat( "\t", '<link rel="stylesheet" href="', PURE_CSS, '">', "\n", sep='' )
+			cat( "\t<style>\n", sep='')
+				## include CSS into html file, makes it more portable
+				cat( "\t\t", readLines( CSS_FILE ), sep="\n\t\t" )
+				#cat( "\t\th1 {color:red;}", "\n")
+			cat( "\n\t</style>\n" )
+			
+		cat( "\n</head>\n")
+		cat( "\n<body>\n")
+
+		cat( "<h1>QDNAseq Report</h1>", "\n")
+		
+		cat( '<h3 class="qdnaseq">About this analysis</h3>', "\n")
+		cat( '<p>This page provides access to all results. To have a local copy of this report just download the <a href="', gzipOutputName, '" class="button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep='')		
+		
+		## ------------------------
+		## table with general info
+		## ------------------------
+		cat( '<h3 class="qdnaseq">Settings</h3><p>', "\n")
+		cat( '<table class="pure-table pure-table-striped"><tbody>' )
+			cat( htmlTableRow( c( "AnalysisName", outputName ) ) )
+			cat( htmlTableRow( c( "AnalysisStart", analysisStart ) ) )
+			cat( htmlTableRow( c( "AnalysisEnd", analysisEnd ) ) )
+			cat( htmlTableRow( c( "AnalysisTime", timeDiff ) ) )
+			cat( htmlTableRow( c( "BinSizes (kbp)", paste(binSizes,collapse=", ") ) ) )
+			cat( htmlTableRow( c( "R info", rVersion ) ) )
+			cat( htmlTableRow( c( "QDNAseq info", qdnaseqVersion ) ) )
+			
+			sampleStrings <- c()
+			for ( galaxyName in fileList[[ 'all_files' ]] ){
+				sampleName <- fileList[[ galaxyName ]]
+				sampleStrings <- c( sampleStrings, paste( galaxyName, ' (', sampleName, ')', sep='' ) )
+			}
+			cat( htmlTableRow( c( "InputBams", paste( sampleStrings, collapse=", ") ) ) )
+
+		cat( "</tbody></table></p>", "\n")
+		
+		## ------------------------
+		## list with links to all output files
+		## ------------------------
+		cat( '<h3 class="qdnaseq">Output files</h3><p>', "\n")
+		cat( '<p>This table contains output files that can be used for local downstream analysis with the bioconductor QDNAseq package. For each bin-size / data-level there is a R data structure file with data of all samples. See ', htmlLink( WEB_LINK, 'the bioconductor QDNAseq documentation' ), ' for more information on how to work with these files</p>', "\n", sep='')
+		cat( '<table class="pure-table pure-table-striped">', "\n" )
+		cat( '<thead><th>Type</th>', as.vector( mapply( paste, "<th>", binSizes, "kbp</th>", sep="" ) ),'</thead>', "\n" )
+		cat( "<tbody>", "\n")
+			files <- list()
+			fileTypes <- c( 'ReadCounts.rds', 'CopyNumbers.rds' )
+			if ( doSegment ){ fileTypes <- c( fileTypes, 'CopyNumbersSegmented.rds') }
+
+			for ( fileType in fileTypes ){
+				fileNames <- mapply( paste, binSizes, paste( 'kbp_QDNAseq', fileType, sep=''), sep='')
+				fileLinks <- mapply( htmlLink, fileNames, paste( binSizes, "kbp", sep="" ) )
+				cat( htmlTableRow( c( fileType, fileLinks ) ) )	
+			}
+		cat( "\n</tbody></table></p>", "\n")
+
+		## ------------------------
+		## table with links to files	
+		## ------------------------
+		ratio <- PLOT_WIDTH / PLOT_HEIGHT
+		width <- 960; height <- width / ratio ## bigger img
+		width_t <- 100; height_t <- 40 ## thumb img
+
+		cat( '<h3 class="qdnaseq">Results: overview</h3><p>', "\n")
+		cat( '<p>This table contains the visual results of the copy number aberration analysis. You can click on an image to jump to the larger version. If segmentation was performed as well the number of segments is shown and a file with genomic regions can be downloaded (just remember to inspect the results carefully as this is a more exprimental step).</p>', "\n", sep='')
+		plots_html <- ''
+
+		colspan <- 1
+		binHeader <- "<th>Image</th>"
+		if ( doSegment ){ # extra column with segment info
+			colspan <- 2 
+			binHeader <- "<th>Image</th><th>Segments</th>"
+		} 
+		cat( '<table class="pure-table pure-table-striped">', "\n" )
+		cat( '<thead><tr><th></th><th></th>', as.vector( mapply( paste, "<th colspan=\"", colspan,"\">", binSizes, "kbp</th>", sep="" ) ), '</tr></thead>' )
+		cat( '<thead><tr><th>Sample / File</th><th>Reads</th>', rep( binHeader, length(binSizes) ), '</tr></thead>' )
+		cat( '<tbody>' )
+
+			for ( bam_file in bamsNames ){
+				
+				usedReads <- plotted_images[[ paste(binSize, bam_file, 'usedReads', sep="_" ) ]]
+				usedReads <- format( as.integer(usedReads), digits=4, decimal.mark=".", big.mark="," )
+				htmlRow <- paste( '<tr><td>', bam_file, '</td><td>', usedReads, '</td>', sep='' )
+
+				for ( binSize in binSizes ){
+					
+					## add thumbnails to table with links to anchors on html page
+					copy_img <- plotted_images[[ paste(binSize, bam_file, 'CopyNumbers', sep="_" ) ]]
+					html_copy_thumb <- htmlLink( path=paste('#', copy_img, sep=''), paste('<img src="',copy_img,'" alt="', bam_file, '" width="', width_t, '" height="', height_t, '">', sep='') )
+					html_copy_img <- htmlLink( path=copy_img, paste('<img id="', copy_img,'" src="',copy_img,'" alt="',bam_file, '" width="', width, '" height="', height, '">', sep='') )
+					html_segm_img <- ''
+					html_bedGraph <- ''
+					region_count <- ''
+					htmlRow <- paste( htmlRow, '<td>', html_copy_thumb, '</td>' )
+
+					if ( doSegment ){
+						segm_img <- plotted_images[[ paste(binSize, bam_file, 'Segmented', sep="_" ) ]]
+						region_count <- plotted_images[[ paste(binSize, bam_file, 'region_count', sep="_" ) ]]
+
+						html_bedGraph <- htmlLink( path=plotted_images[[ paste(binSize, bam_file, 'bedgraph', sep="_" ) ]], 'bedGraph' )
+						html_segm_img <- htmlLink( path=segm_img, paste('<img id="', segm_img,'" src="', segm_img,'" alt="', bam_file, '" width="', width, '" height="', height,'">', sep='') )
+						htmlRow <- paste( htmlRow, '<td>', region_count, ' (', html_bedGraph, ')</td>', sep="" )
+					}
+					plots_html <- paste( plots_html, html_copy_img, "\n", html_segm_img, "\n<br \\>\n", sep='' )	
+				}
+				plots_html <- paste( plots_html, "\n<hr \\>\n", sep='' )
+				## add info to overview table, including small thumbnails
+				htmlRow <- paste( htmlRow, '</tr>', sep='' )
+				cat( htmlRow, "\n" )				
+			}
+		cat( "</tbody></table></p>", "\n")
+		
+		## ------------------------
+		## section with various output shown
+		## ------------------------
+		cat( '<h3 class="qdnaseq">Results: Sample plots</h3><p>', "\n")
+		## now include (large) images in html page
+		cat( plots_html, "\n")
+		cat( "\n</p></body>\n")
+		cat( "\n</html>\n")
+sink()
+
+## ------------------------
+## creating main html output for galaxy history
+## ------------------------
+if ( inGalaxy ){ # dont create when running outside Galaxy
+	sink( file = outputHtml, type = "output" )
+			
+		cat( "<head>", "\n")
+			cat( "\t", '<link rel="stylesheet" href="', PURE_CSS, '">', "\n", sep='' )
+
+			cat( "<style>", "\n")
+				## include CSS directly into html file
+				cat( paste( "\t", '/* the css here originates from ', CSS_FILE,' */', "\n") )
+				cat( paste( "\t", readLines( CSS_FILE, n = -1)), sep="\n" )
+			cat( "</style>", "\n")
+		cat( "</head>", "\n")
+
+		cat( '<h1>QDNAseq Results (', outputName,')</h1>', "\n", sep="")
+		cat( '<p>Explore <a href="', htmlOutputName, '" class="button">the results</a> directly within galaxy</p>', "\n", sep="")
+		cat( '<p>Or download a <a href="', gzipOutputName, '" class="button">zipfile</a> with all output (', zippedSize, ')</p>', "\n", sep="" )
+
+	sink()
+}
+
+## ------------------------
+## create final zip and quit with status 0 to tell galaxy all was fine
+## ------------------------
+catMsg( "zipping all output")
+system( paste( "zip -j ", gzipOutputPath, paste(outputPath,'/', htmlOutputName, sep='') ) )
+catMsg( "done" )
+q(status=0)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qdnaseq_test-41387ffa145f/QDNAseq.sh	Mon Oct 06 11:04:42 2014 -0400
@@ -0,0 +1,148 @@
+#!/bin/sh
+
+## This script creates a config file for use with the QDNAseq R wrapper
+## For use outside Galaxy
+
+QDNASEQ_R_SCRIPT='/ccagc/home/stef/code/QDNASEQ/QDNAseq.R'
+R_LOCATION='/ccagc/lib/R/R-3.1.0/bin/'
+OUTPUT_DIR='QDNASEQ'
+SCRIPT_DIR=$(dirname $0)
+SCRIPT_NAME=$(basename $0)
+
+binSizesString='1000,100,15'
+outputName=`basename $PWD`
+doSegment='TRUE'
+debug='FALSE'
+
+usage()
+{
+cat<<EOF
+  This script creates a QDNAseq wrapper config.
+  
+  Experiment name and/or output dir are optional
+    $SCRIPT_NAME [-n myExperiment] [-o outputDir] *.bam
+
+  Debug mode for working with build in LGG150 data
+    $SCRIPT_NAME -d
+
+  OPTIONS:
+    -n [s]  provide analysis name [$outputName]
+    -o [s]  output dir [$OUTPUT_DIR]
+    -d      debug mode (only create config)
+    
+EOF
+}
+
+PARAM_COUNT=0
+if [ $# -eq 0 ] || [ "$1" == "-h" ]
+then
+	usage
+	exit 1
+else
+	while getopts "n:o:d" opt; do
+	  case $opt in
+	    n)
+	      #echo "$opt was triggered, Parameter: $OPTARG" >&2
+	      outputName=$OPTARG
+	      PARAM_COUNT=`bc <<< $PARAM_COUNT+2`
+	      ;;
+	    o)
+	      OUTPUT_DIR=$OPTARG
+	      PARAM_COUNT=`bc <<< $PARAM_COUNT+2`
+	      ;;
+	    d)
+	      debug='TRUE'
+	      PARAM_COUNT=`bc <<< $PARAM_COUNT+1`
+	      ;;
+	    \?)
+	      echo "Invalid option: -$OPTARG" >&2
+	      exit 1
+	      ;;
+	    :)
+	      echo "Option -$OPTARG requires an argument." >&2
+	      exit 1
+	      ;;
+	  esac
+	done
+fi
+
+## set param dependent variables
+printf '%s\n' "[INFO] --- QDNAseq wrapper config creator ---"
+CONFIG_FILE=$OUTPUT_DIR'/qdnaseqConfig.R'
+
+## remove the params from input string leaving only the BAMs
+shift $PARAM_COUNT
+
+## sanity checks
+if [ -e $CONFIG_FILE ]; then
+	echo "[ERR] Config file ($CONFIG_FILE) already exists, exit.."; 
+	exit 0
+fi
+if [ -d $OUTPUT_DIR ]; then
+	echo "[ERR] Output dir ($OUTPUT_DIR) already exists, exit.."; 
+	exit 0
+else
+	mkdir $OUTPUT_DIR
+fi
+
+## variables setup
+CONFIG_TXT="## ==========
+## QDNAseq pipeline config file
+## ==========
+
+## ----------
+inGalaxy <- FALSE
+binSizesString <- '$binSizesString'
+experimentType <- 'SR50'
+outputName <- '$outputName'
+
+## ----------
+outputHtml  <- 'galaxyIndex.html'
+outputId    <- NA
+newFilePath <- '$OUTPUT_DIR'
+outputPath  <- '$OUTPUT_DIR'
+doSegment   <- as.logical( $doSegment )
+debug       <- as.logical( $debug )
+undoSD      <- as.double( 1.0 )
+binAnnotations <- ''
+
+## ----------
+filterBlacklistedBins <- as.logical( 'TRUE' )
+mappabilityCutoff <- as.integer( 0 )
+undoSplits <- 'sdundo'
+doOutputCopynumbersIgv <- FALSE
+
+## ----------
+PLOT_WIDTH <- as.integer( 1440 )
+PLOT_HEIGHT <- as.integer( 720 )
+excludeChrsString <- 'X,Y'
+
+## ----------
+bamsPaths <- c()
+bamsNames <- c()
+"
+
+## create config file
+printf '%s\n' "$CONFIG_TXT" > $CONFIG_FILE
+
+## add bam files to config file
+BAMS=$@
+for bam_path in $BAMS; do
+	
+	if [ ! -e $bam_path  ]; then
+		echo "[ERR] Bam file does not exist ($bam_path), exit.."; 
+		exit 0
+	fi
+
+	bam_name=`basename "$bam_path"`
+	printf '%s\n' "bamsPaths <- c( bamsPaths, \"$bam_path\" )" >> $CONFIG_FILE
+	printf '%s\n' "bamsNames <- c( bamsNames, \"$bam_name\" )" >> $CONFIG_FILE
+	printf '%s\n' "[INFO] BAM: $bam_name"
+done
+
+## done
+printf '%s\n' "[INFO] OUTPUT DIR:  $OUTPUT_DIR"
+printf '%s\n' "[INFO] OUTPUT NAME: $outputName"
+printf '%s\n' "[INFO] CONFIG FILE can be found in $CONFIG_FILE"
+printf '%s\n' "[INFO] Run with: $R_LOCATION/Rscript $QDNASEQ_R_SCRIPT $CONFIG_FILE"
+printf '%s\n' "[INFO] Done"
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qdnaseq_test-41387ffa145f/QDNAseq.xml	Mon Oct 06 11:04:42 2014 -0400
@@ -0,0 +1,291 @@
+<tool id="QDNAseq" name="QDNAseq" version="0.0.2" force_history_refresh="True">
+  
+  <requirements>
+    
+    <!-- R 3.1.0 dependency will be used instead when available, now default R is used, see command -->
+    <!-- <requirement type="package" version="3.1.0">R</requirement> -->
+    <!-- <requirement type="package" version="1.0.5">qdnaseq</requirement> -->
+    <requirement type="package" version="0.1.18">samtools</requirement>
+  </requirements>
+
+  <description>Quantitative copy number abberation detection</description>
+
+  <!-- change to /full/path/to/Rscript if required (eg /ccagc/lib/R/R-3.1.0/bin/Rscript) -->
+  <command interpreter="Rscript"> 
+    QDNAseq.R 
+    $qdnaseq_cfg
+  </command>
+
+  <stdio>
+    <!-- Anything higher than 0 means the R script didnt finish (correctly) -->
+    <!-- Because different R packages deal with err/warn differently unable to waterproof this -->
+    <exit_code range="1:" level="fatal" description="R script didnt finish correctly, check log" />
+  </stdio>
+  
+  <inputs>
+    
+    <!-- ==================== -->
+    <!-- General inputs -->
+    <!-- ==================== -->
+    
+    <!-- Job name: must contain non-whitespace chars -->
+    <param name="jobName" type="text" optional="false" label="Analysis/ouput name" help="Supply a name for the outputs to remind you what they contain" value="TEST">
+      <validator type="empty_field" />
+      <validator type="regex" message="This field should contain some non-whitespace character">.*\S</validator>
+    </param>
+
+    <!-- Bin Size: only certain sizes are supported by QDNAseq package -->
+    <param name="binSizes" type="select" optional="false" multiple="true" label="Select bin-sizes to use (kb)" help="Larger bin sizes provide faster analysis but lower resolution">
+      <option value="1000" selected="true">1Mb</option>
+      <option value="100" selected="true">100kb</option>
+      <option value="30">30kb</option>
+      <option value="15" selected="true">15kb</option>
+      <option value="5">5kb</option>
+      <option value="1">1kb</option>
+    </param>
+
+    <!-- Experiment type: only one type (SR50) supported now, maybe more in the future-->
+    <param name="experimentType" type="select" label="Type of sequencing data" help="Currently only single end reads of lenght 50 are supported within galaxy">
+      <option value="SR50">Single Reads of 50bp</option>
+      <!-- <option value="PE1000">PairedEnd1000</option> -->
+    </param>
+
+    <!-- ==================== -->
+    <!-- Input BAMs -->
+    <!-- ==================== -->
+    <param name="bams" type="data" multiple="true" optional="True" format="bam" label="Input BAMs" help="Select the BAM files to analyze" />
+
+    <!-- ==================== -->
+    <!-- Optional segmenting -->
+    <!-- ==================== -->
+    <param name="doSegment" type="select" label="Also perform segmentation" help="Segmentation collects bins with similar ratio into regions">
+      <option value="TRUE">yes</option>
+      <option value="FALSE">no</option>
+    </param>
+
+    <!-- ==================== -->
+    <!-- Option to use your own bin annotations file -->
+    <!-- ==================== -->
+    <conditional name="binannotations_source">
+      <param name="show" type="select" label="Bin annotations to use" help="Default bin-annotations are for GRCh37/hg19 and tuned for 50bp reads (single end)">
+        <option value="default">Default</option>
+        <option value="history">From history</option>
+      </param>
+      <when value="history">
+        <param name="binannotation_file" type="data" multiple="false" label="R data structure file (*.rds) with bin-annotations" help="If you made your own bin-annotations with the QDNAseq bioconductor package you can upload them to your history and select here" />
+      </when>
+      <when value="default">
+        <param name="binannotation_file" type="hidden" value="" />
+      </when>
+      
+    </conditional> 
+
+    <!-- ==================== -->
+    <!-- Optional advanced options -->
+    <!-- ==================== -->
+    <conditional name="advanced">
+      <param name="show" type="select" label="Use advanced options" help="Select yes to show and use filter and output options">
+        <option value="no">no</option>
+        <option value="yes">yes</option>
+      </param>
+      <when value="yes">
+        
+        <param name="copynumbers_igv" type="select" label="Also output copynumber IGV file to history">
+          <option value="FALSE">no</option>
+          <option value="TRUE">yes</option>
+        </param>
+
+        <param name="undo_splits" type="select" label="undoSplits" help="If set to sdundo, see undoSD below">
+          <option value="sdundo">sdundo</option>
+          <option value="prune">prune</option>
+          <option value="none">none</option>
+        </param>
+
+        <param name="undoSD" size="10" type="float" value="1" label="undoSD" help='The number of SDs between means to keep a split if undo.splits="sdundo".' />
+          
+        <param name="blacklist" type="select" label="Filter blacklisted bins (blacklist)" help="Will exclude all blacklisted bins in the binannotation if set">
+          <option value="TRUE">yes</option>
+          <option value="FALSE">no</option>
+        </param>
+
+        <param name="mappability" type="integer" value="0" min="0" max="100" label="Filter bins with lower mappability" help="Will exclude all bins will lower mappability than this number (0-100)" />
+      
+        <!-- ==================== -->
+        <!-- Optional graphical/plotting options -->
+        <!-- ==================== -->
+        <param name="plot_width" size="3" type="integer" value="1440" label="Width of the png image produced" />
+        <param name="plot_height" size="3" type="integer" value="720" label="Height of the png image produced" />
+        <param name="exclude_chrs" type="select" multiple="true" label="Hide these chromosomes in plots" help="Currently only standard human chromosomes supported. NOTE: other filters might also exclude chromosomes">
+          <option value="1">1</option><option value="2">2</option>
+          <option value="3">3</option><option value="4">4</option>
+          <option value="5">5</option><option value="6">6</option>
+          <option value="7">7</option><option value="8">8</option>
+          <option value="9">9</option><option value="10">10</option>
+          <option value="11">11</option><option value="12">12</option>
+          <option value="13">13</option><option value="14">14</option>
+          <option value="15">15</option><option value="16">16</option>
+          <option value="17">17</option><option value="18">18</option>
+          <option value="19">19</option><option value="20">20</option>
+          <option value="21">21</option><option value="22">22</option>
+          <option value="X" selected="true">X</option>
+          <option value="Y" selected="true">Y</option>
+        </param>
+      </when>
+
+      <!-- need to set defaults because params are passed to R regardless of conditional opened/closed -->
+      <when value="no">
+        <param name="copynumbers_igv" type="hidden" value="FALSE" />
+        <param name="undoSD" type="hidden" value="1" />
+        <param name="undo_splits" type="hidden" value="sdundo" />
+        <param name="blacklist" type="hidden" value="TRUE" />
+        <param name="mappability" type="hidden" value="0" />
+        <param name="plot_width" type="hidden" value="1440" />
+        <param name="plot_height" type="hidden" value="720" />
+        <param name="exclude_chrs" type="hidden" value="X,Y" />
+      </when>
+    </conditional>
+
+    <!-- ==================== -->
+    <!-- Option to perform a test run with built in data -->
+    <!-- ==================== -->
+    <param name="debug" type="select" label="Run with test data" help="Use inbuilt LGG150 data instead of input BAMs">
+      <option value="FALSE">no</option>
+      <option value="TRUE">yes</option>
+    </param>
+    
+  </inputs>
+  <!-- ==================== -->
+  <!-- Config file to pass params to R script -->
+  <!-- ==================== -->
+  <configfiles>
+    <configfile name="qdnaseq_cfg">
+## Desc: this file was sourced in QDNAseq R wrapper script
+##  as means to pass all galaxy params to R
+
+## -----
+## required params
+## -----
+TRUE -> inGalaxy 
+"${binSizes}" -> binSizesString
+"${experimentType}" -> experimentType
+"${jobName}" -> outputName
+
+## -----
+## extra main params
+## -----
+"${htmlFile}" -> outputHtml
+"${htmlFile.id}" -> outputId
+"${__new_file_path__}" -> newFilePath
+
+"${htmlFile.files_path}" -> outputPath
+as.logical( "${doSegment}" ) -> doSegment
+as.logical( "${debug}" ) -> debug
+
+## -----
+## own bin-annotations file options
+## -----
+"${binannotations_source.binannotation_file}" -> binAnnotations
+
+## -----
+## advanced options
+## -----
+as.double( "${advanced.undoSD}" ) -> undoSD
+as.logical( "${advanced.blacklist}" ) -> filterBlacklistedBins
+as.integer( "${advanced.mappability}" ) -> mappabilityCutoff
+"${advanced.undo_splits}" -> undoSplits
+as.logical( "${advanced.copynumbers_igv}" ) -> doOutputCopynumbersIgv
+
+## #for binSize in $binSizes}.split(",")# 
+## "${binSize}kbp_${igvCopyNumbers}" -> copyNumbersIgvDatasetFile
+## #end for
+
+## -----
+## plot options
+## -----
+as.integer( "${advanced.plot_width}" ) -> PLOT_WIDTH
+as.integer( "${advanced.plot_height}" ) -> PLOT_HEIGHT
+"${advanced.exclude_chrs}" -> excludeChrsString
+  
+## -----
+## input BAMs init
+## -----
+c() -> bamsPaths
+c() -> bamsNames
+
+#for bam in $bams# 
+c( bamsPaths, "${bam}" ) -> bamsPaths
+c( bamsNames, "${bam.name}" ) -> bamsNames
+#end for
+
+    </configfile>
+  </configfiles>
+
+  <!-- ==================== -->
+  <!-- Main output is an html based report -->
+  <!-- ==================== -->
+  <outputs>
+
+    <!-- main output is a html report -->
+    <!-- ...but there can be more outputs using the id of the htmlFile output -->
+    <data format="html" name="htmlFile" label="QDNAseq: ${jobName}" />
+
+  </outputs>
+
+  <!-- ==================== -->
+  <!-- Tests still to be done -->
+  <!-- ==================== -->
+
+  <!-- 
+  <tests>
+    <test>
+      <param name="input1" value="input1" />   
+      <param name="input2" value="input2" />   
+    </test>
+  </tests>
+  -->
+
+  <help>
+.. class:: infomark
+
+**Introduction**
+
+This tool is a wrapper for the R Bioconductor package QDNAseq_
+
+.. _QDNAseq: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
+
+It determines the copy number state of human chromosomes 1 - 22 for (shallow coverage) whole genome sequencing data.
+
+For questions/remarks about the galaxy part of this tool, see contact form here_
+
+.. _here: http://www.stefs.nl/wp/contact
+
+You can **test this tool** with built-in data by selecting the option "Run with test data" and press execute.
+
+-----
+
+.. class:: warningmark
+
+As there is no R 3.1.0 galaxy-package yet (a requirement for QDNAseq), the **dependencies** need to be installed by hand and available to the user under which galaxy runs: R (3.1.0) and bioconductor package QDNAseq (>= 1.0.5). In case the default R is not 3.1.0, also the wrapper xml must be updated to include the correct path during installation of this tool.
+
+.. class:: warningmark
+
+The input BAMs are expected to be **single end reads of 50bp length** mapped to GRCh37/hg19 genome build. Other experiment setups are currently not tested or supported within galaxy. See the documentation of QDNAseq at bioconductor on how to deal with different setups (or keep fingers crossed ;) )
+
+.. class:: warningmark
+
+Requires **internet access** for downloading bin-annotations from bitbucket and to show some styling (css) of the final report
+
+-----
+
+**Citation**
+
+For the underlying QDNAseq R package please cite: llari Scheinin, Daoud Sie et al. DNA copy number analysis of fresh and formalin-fixed specimens by whole-genome sequencing: improved correction of systematic biases and exclusion of problematic regions, (submitted). See also the bioconductor package_ documentation.
+
+.. _package: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
+
+.. image:: LGG150_copynumber.png
+.. image:: LGG150_copynumberSegmented.png
+
+  </help>
+
+</tool>
Binary file qdnaseq_test-41387ffa145f/static/binannotation/1000kbp_binAnnotations.rds has changed
Binary file qdnaseq_test-41387ffa145f/static/binannotation/100kbp_binAnnotations.rds has changed
Binary file qdnaseq_test-41387ffa145f/static/binannotation/15kbp_binAnnotations.rds has changed
Binary file qdnaseq_test-41387ffa145f/static/binannotation/30kbp_binAnnotations.rds has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qdnaseq_test-41387ffa145f/static/css/QDNAseq.css	Mon Oct 06 11:04:42 2014 -0400
@@ -0,0 +1,6 @@
+/* --- QDNAseq CSS --- */
+body{ padding: 0px 20px; }
+pre { padding: 0px 25px; }
+h3.qdnaseq{ color: #355681;	border-bottom: 1px solid #355681;}
+.marker { background: yellow; color: red;}
+.button { background: rgb(28, 184, 65); color: white; border-radius: 4px; padding: 4px 10px }
Binary file qdnaseq_test-41387ffa145f/static/images/LGG150_copynumber.png has changed
Binary file qdnaseq_test-41387ffa145f/static/images/LGG150_copynumberSegmented.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qdnaseq_test-41387ffa145f/tool_dependencies.xml	Mon Oct 06 11:04:42 2014 -0400
@@ -0,0 +1,29 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    
+    <package name="samtools" version="0.1.18">
+        <repository changeset_revision="c0f72bdba484" name="package_samtools_0_1_18" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" prior_installation_required="TRUE" />
+    </package>
+
+    <!-- As soon as R 3_1_0 package works good, all dependencies will be set via "package_qdnaseq_1_0_5" -->
+    <!-- Currently these have to be manually installed by installing Bioconductor package "QDNAseq" -->
+    
+    <!--
+    <package name="R" version="3.1.0">
+        <repository changeset_revision="a6cc7706ea14" name="package_r_3_1_0" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" prior_installation_required="TRUE" />
+    </package>
+    -->
+
+    <!--
+    <package name="qdnaseq" version="1.0.5">
+        <repository name="package_qdnaseq_1_0_5" owner="stef"/>
+    </package>
+    -->
+
+    <!--
+    <action type="set_environment">
+        <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR</environment_variable>
+    </action>
+    -->
+    
+</tool_dependency>
Binary file static/binannotation/1000kbp_binAnnotations.rds has changed
Binary file static/binannotation/100kbp_binAnnotations.rds has changed
Binary file static/binannotation/15kbp_binAnnotations.rds has changed
Binary file static/binannotation/30kbp_binAnnotations.rds has changed
--- a/static/css/QDNAseq.css	Mon Oct 06 05:33:48 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-/* --- QDNAseq CSS --- */
-body{ padding: 0px 20px; }
-pre { padding: 0px 25px; }
-h3.qdnaseq{ color: #355681;	border-bottom: 1px solid #355681;}
-.marker { background: yellow; color: red;}
-.button { background: rgb(28, 184, 65); color: white; border-radius: 4px; padding: 4px 10px }
Binary file static/images/LGG150_copynumber.png has changed
Binary file static/images/LGG150_copynumberSegmented.png has changed
--- a/tool_dependencies.xml	Mon Oct 06 05:33:48 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    
-    <package name="samtools" version="0.1.18">
-        <repository changeset_revision="c0f72bdba484" name="package_samtools_0_1_18" owner="devteam" prior_installation_required="TRUE" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
-
-    <!-- As soon as R 3_1_0 package works good, all dependencies will be set via "package_qdnaseq_1_0_5" -->
-    <!-- Currently these have to be manually installed by installing Bioconductor package "QDNAseq" -->
-    
-    <package name="R" version="3.1.0">
-        <repository changeset_revision="a6cc7706ea14" name="package_r_3_1_0" owner="iuc" prior_installation_required="TRUE" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-    </package>
-    
-    <!--
-    <package name="qdnaseq" version="1.0.5">
-        <repository name="package_qdnaseq_1_0_5" owner="stef"/>
-    </package>
-    -->
-
-    <!--
-    <action type="set_environment">
-        <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR</environment_variable>
-    </action>
-    -->
-    
-</tool_dependency>