changeset 27:f89205f51e27 draft default tip

Uploaded
author stef
date Mon, 06 Jul 2015 05:41:08 -0400
parents 069289631381
children
files QDNAseq-export.R QDNAseq-export.xml QDNAseq-plot.R QDNAseq-plot.xml QDNAseq-regioning.R QDNAseq-regioning.xml QDNAseq-version.R QDNAseq.R QDNAseq.xml README.md
diffstat 10 files changed, 557 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/QDNAseq-export.R	Mon Jul 06 05:41:08 2015 -0400
@@ -0,0 +1,62 @@
+#!/usr/bin/Rscript
+
+## --------------------
+## prints all arguments as msg
+## --------------------
+catMsg <- function( msg=c() ){	
+	cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='')
+}
+
+## ==================================================
+## Start of analysis
+## ==================================================
+MAIN_NAME <- '[INFO] '
+catMsg( "Starting QDNAseq-export wrapper" )
+
+## supress msg to allow R to finish with non-error msg
+catMsg( "Loading R libraries" )
+suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) )
+suppressWarnings( suppressMessages( library( CGHcall, quietly = TRUE ) ) )
+
+## only one param: the tmp config file
+cmdLineArgs <- commandArgs(TRUE)
+config      <- cmdLineArgs[1]
+
+## sourcing the config file will load all input params
+## many variables are imported via sourced "config"
+source( config ) # outputFile, outputName, outputFormat, sampleIndex, filterBins
+
+systemUser <- system("whoami",T)
+qdnaseqVersion <- packageDescription( "QDNAseq" )$Version
+rVersion <- R.version.string
+rPath <- R.home()
+catMsg( c("QDNAseq version ", qdnaseqVersion) )
+catMsg( c( rVersion ) )
+qdnaseqObject <- readRDS( rdsFilePath )
+logTransform <- TRUE
+
+sampleNames <- sampleNames( qdnaseqObject )
+elements <- assayDataElementNames(qdnaseqObject)
+element <- dataLevel
+if (dataLevel == "segments") element <- "segmented"
+if (element == "calls") logTransform <- FALSE
+
+## sanity checks
+if ( ! element %in% elements ) stop( paste( "Data-level \"", element, "\" not present in object", sep='') ) 
+if ( outputFormat == "bed" ){
+	if ( sampleIndex == "None") stop("Bed option requires sample index")
+	if ( sampleIndex > length(sampleNames) ) stop("Chosen sample index not present in object")
+	qdnaseqObject <- qdnaseqObject[ ,sampleIndex]
+}
+
+## output
+exportBins( qdnaseqObject, 
+	file=outputFile, 
+	format=outputFormat, 
+	filter=filterBins, 
+	type=dataLevel, 
+	logTransform=logTransform 
+)
+
+## tell galaxy all seems ok
+q(status=0)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/QDNAseq-export.xml	Mon Jul 06 05:41:08 2015 -0400
@@ -0,0 +1,186 @@
+<tool id="QDNAseq-export" name="QDNAseq-export" version="0.0.1" force_history_refresh="True">
+  
+  <requirements>
+    
+    <!-- R 3.1.0 dependency will be used instead when available, now default R is used, see command -->
+    <!-- <requirement type="package" version="3.1.0">R</requirement> -->
+    <!-- <requirement type="package" version="1.2.2">qdnaseq</requirement> -->
+    
+  </requirements>
+
+  <description>Export QDNAseq data to tabular</description>
+
+  <!-- change to /full/path/to/Rscript if required (eg /ccagc/lib/R/R-3.1.0/bin/Rscript) -->
+  <command interpreter="Rscript"> 
+    QDNAseq-export.R 
+    $cfg
+  </command>
+
+  <version_command interpreter="Rscript">QDNAseq-version.R</version_command>
+
+  <stdio>
+    <!-- Anything higher than 0 means the R script didnt finish (correctly) -->
+    <!-- Because different R packages deal with err/warn differently unable to waterproof this -->
+    <exit_code range="1:" level="fatal" description="R script finished too early, check log" />
+  </stdio>
+  
+  <inputs>
+    
+    <!-- ==================== -->
+    <!-- General inputs -->
+    <!-- ==================== -->
+    
+    <!-- Job name: must contain non-whitespace chars -->
+    <param name="jobName" type="text" optional="false" label="Analysis/ouput name" help="Supply a name for the outputs to remind you what they contain" value="TEST">
+      <!-- <validator type="empty_field" /> -->
+      <validator type="regex" message="No whitespace characters allowed">^[^\s\\]*$</validator>
+    </param>
+
+    <!-- ==================== -->
+    <!-- Input RDS -->
+    <!-- ==================== -->
+    <param name="rdsFile" type="data" optional="False" format="rds" label="Input RDS file" help="RDS file should contain a QDNAseq R object (output of QDNAseq tool)" />
+
+    <!-- ==================== -->
+    <!-- Data level -->
+    <!-- ==================== -->
+    <param name="data_level" type="select" label="Level of data to export" help="If segmentation and/or calling has been performed the segmented or called values can be exported instead of copynumber (normalized read counts)">
+      <option value="copynumber">copynumbers</option>
+      <option value="segments">segments</option>
+      <option value="calls">calls</option>
+    </param>
+
+    <!-- ==================== -->
+    <!-- Include filtered bins or not -->
+    <!-- ==================== -->
+    <param name="filter_bins" type="select" label="Also output copynumber RDS files to history" help="Each bin has a filter status. By default the bins that were previously ignored by the analysis before are not send to the output. Set to 'include' if you want to include those as well">
+      <option value="TRUE">Exclude filtered bins</option>
+      <option value="FALSE">Include filtered bins</option>
+    </param>    	
+
+    <!-- ==================== -->
+    <!-- Output type -->
+    <!-- ==================== -->
+    <conditional name="output_format_selection">
+      <param name="output_format" type="select" label="Plot all samples in RDS object or choose one" help="All output is tabular, but depending on downstream use some formats are more handy than others">
+        <option value="tsv">TSV</option>
+        <option value="igv">IGV</option>
+        <option value="bed">BED</option>
+      </param>
+      <when value="bed">
+        <param name="sample_index" type="integer" required="True" value="1" label="sample-index (integer)" help="The object can host muliple samples while the BED option can only export one. Therefor a sample index must be chosen for this output." />
+      </when>
+      <when value="tsv">
+        <param name="sample_index" type="hidden" value="" />
+      </when>
+      <when value="igv">
+        <param name="sample_index" type="hidden" value="" />
+      </when>
+      
+    </conditional> 
+    
+  </inputs>
+
+  <!-- ==================== -->
+  <!-- Config file to pass params to R script -->
+  <!-- ==================== -->
+  <configfiles>
+    <configfile name="cfg">
+## Desc: this file is sourced in QDNAseq-export.R wrapper script
+##  as means to pass all galaxy params to R
+
+"${jobName}" -> outputName
+"${output_file}" -> outputFile
+"${data_level}" -> dataLevel
+"${output_format_selection.output_format}" -> outputFormat
+"${rdsFile}" -> rdsFilePath
+as.integer( "${output_format_selection.sample_index}" ) -> sampleIndex
+as.logical( "${filter_bins}" ) -> filterBins
+
+    </configfile>
+  </configfiles>
+
+  <!-- ==================== -->
+  <!-- One text file as output -->
+  <!-- ==================== -->
+  <outputs>
+    <data format="tabular" name="output_file" label="QDNAseq: ${jobName} export file" />
+  </outputs>
+
+  <help>
+
+**Introduction**
+
+This tool is a wrapper for the "exportBins" function of the R Bioconductor package QDNAseq_
+
+.. _QDNAseq: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
+
+-----
+
+**What it does**
+
+**Input:** The input for this tool is a QDNAseq R object in RDS (R data structure) format, a (optional) output file of the QDNAseq galaxy tool. **Output:** Running this export tool provides you with one output text file. When either TSV or IGV is chosen as output format, the output file contains data of all samples present in the object. When BED is chosen as output format, output contains only one sample (by default the first). **OutputDataLevel:** The output data can be of three levels. If the object contains segmented and/or call values these can be chosen instead of the default copynumber (log2 transformed normalized read counts).
+
+-----
+
+**Output examples**
+
+*Example BED output:*
+
+::
+
+ track name="SAMPLE1.bam" description="copynumber"
+ 1  6000000  7000000  1:6000001-7000000  1.293  +
+ 1  7000000  8000000  1:7000001-8000000  1.335  +
+
+*Example TSV output:*
+
+::
+
+ feature            chr start    end      SAMPLE1.bam  SAMPLE2.bam
+ 1:6000001-7000000  1	6000001  7000000  1.293	       -0.979
+ 1:7000001-8000000  1	7000001  8000000  1.335        -1.022
+
+*Example IGV output (at segmented level):*
+
+::
+
+ #type=COPY_NUMBER
+ #track coords=1
+ chr  start    end      feature            SAMPLE1.bam  SAMPLE2.bam
+ 1    6000001  7000000  1:6000001-7000000  1.314        -1.0005
+ 1    7000001  8000000  1:7000001-8000000  1.314        -1.0005
+
+*Example IGV output (at called level):*
+
+::
+
+ #type=COPY_NUMBER
+ #track coords=1
+ chr  start    end      feature            SAMPLE1.bam  SAMPLE2.bam
+ 1    6000001  7000000  1:6000001-7000000  1            -1
+ 1    7000001  8000000  1:7000001-8000000  1            -1
+
+-----
+
+.. class:: warningmark
+
+As there is no R 3.1.0 galaxy-package yet (a requirement for QDNAseq) that works with all requirements, the **dependencies** need to be installed by hand and available to the user under which galaxy runs: R (>= 3.1.0) and bioconductor package QDNAseq (>= 1.2.2). In case the path to this R installation is not "R", also the wrapper xml must be updated to include the correct path during installation of this tool.
+
+-----
+
+**Citation**
+
+For the underlying QDNAseq R package please cite: 
+Scheinin I, Sie D, Bengtsson H, van de Wiel MA, Olshen AB, van Thuijl HF, van Essen HF, Eijk PP, Rustenburg F, Meijer GA, Reijneveld JC, Wesseling P, Pinkel D, Albertson DG and Ylstra B (2014). “DNA copy number analysis of fresh and formalin-fixed specimens by shallow whole-genome sequencing with identification and exclusion of problematic regions in the genome assembly.” Genome Research. doi:10.1101/gr.175141.114.
+
+See also the bioconductor package_ documentation.
+
+.. _package: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
+
+.. image:: LGG150_copynumber.png
+.. image:: LGG150_copynumberSegmented.png
+
+  </help>
+
+</tool>
--- a/QDNAseq-plot.R	Mon Jul 06 05:38:44 2015 -0400
+++ b/QDNAseq-plot.R	Mon Jul 06 05:41:08 2015 -0400
@@ -37,13 +37,42 @@
 catMsg( c( rVersion ) )
 
 qdnaseqObject <- readRDS( rdsFilePath )
+chromosomesToPlot <- unlist( strsplit( chromosomesToPlotString, ",") )
+
+#cat( "CHROM: ", chromosomesToPlotString, "\n" )
+#cat( "REGION: ", regionToPlotString, "\n" )
+#cat( "What to plot: ", whatToPlot, "\n" )
+
 ## COPYNUMBER PLOT
 sample <- SAMPLE_INDEX
 png( outputPngPath, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES )
 	par( PAR_SET )
-	plot( qdnaseqObject[ ,sample ] ) 
+	ylab_text <- "log2 read counts"
+	if ( whatToPlot == 'everything' ){
+		catMsg( c( "Plotting all data in object" ) )
+		plot( qdnaseqObject[ ,sample ], ylab=ylab_text )
+		abline( h=c(-4,-3,-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
+	} else if( whatToPlot == 'chromosomes' ){
+		catMsg( c( "Plotting subset of chromosomes" ) )
+		fdata <- qdnaseqObject@featureData@data
+		idx_region <- which( fdata$chromosome %in% chromosomesToPlot )
+		plot( qdnaseqObject[ idx_region, sample ], ylab=ylab_text )
+		abline( h=c(-4,-3,-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
+	} else if( whatToPlot == 'region' ){
+		regionC <- chrName
+		regionS <- chrStart
+		regionE <- chrEnd
+		if ( regionS > regionE ) stop("Chosen start is > end")
+		catMsg( c( "Plotting genomic region (chr=", regionC, " start=", regionS, " end=", regionE, ")" ) )
+		
+		fdata <- qdnaseqObject@featureData@data
+		idx_region <- which( fdata$chromosome == regionC & fdata$start > regionS & fdata$end < regionE )
+		cat( idx_region, "\n")
+		
+		plot( qdnaseqObject[ idx_region, sample ], doCalls=FALSE, ylab=ylab_text )
+	}
 	#mtext( "plotted in galaxy", 3 )
-	abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
+	
 dev.off()
 
 
--- a/QDNAseq-plot.xml	Mon Jul 06 05:38:44 2015 -0400
+++ b/QDNAseq-plot.xml	Mon Jul 06 05:41:08 2015 -0400
@@ -16,6 +16,8 @@
     $cfg
   </command>
 
+  <version_command interpreter="Rscript">QDNAseq-version.R</version_command>
+
   <stdio>
     <!-- Anything higher than 0 means the R script didnt finish (correctly) -->
     <!-- Because different R packages deal with err/warn differently unable to waterproof this -->
@@ -57,7 +59,57 @@
       
     </conditional> 
     -->
-    <param name="sample_index" type="integer" required="True" value="1" label="Sample-index (integer)" help="The RDS input object can contain data from multiple individuals, this number tells the script which one to plot. Plotting multiple at the same time is not supported." />
+    <param name="sample_index" type="integer" required="True" value="1" label="Sample-index (integer)" help="The RDS input object can contain data from multiple samples, this index tells the script which one to plot. Plotting multiple samples at the same time is not supported." />
+
+    <conditional name="subset_selection">
+      <param name="what_to_plot" type="select" label="What to plot" help="Instead of plotting everything in the object you can also select certain chromosomes or set a genomic region">
+        <option value="everything">Everything</option>
+        <option value="chromosomes">Selected chromosomes</option>
+        <option value="region">Genomic region</option>
+      </param>
+      <when value="everything">
+        <!-- ==================== -->
+        <param name="chr_name" type="hidden" value="NA" />
+        <param name="chr_start" type="hidden" value="NA" />
+        <param name="chr_end" type="hidden" value="NA" />
+        <param name="chromosomes" type="hidden" value="NA" />
+      </when>
+      <when value="chromosomes">
+        <!-- ==================== -->
+        <param name="chromosomes" type="select" multiple="true" optional="false" label="Select chromosomes to plot" help="To zoom in on a particular chromosome you can select one or more here">
+          <option value="1">1</option><option value="2">2</option>
+          <option value="3">3</option><option value="4">4</option>
+          <option value="5">5</option><option value="6">6</option>
+          <option value="7">7</option><option value="8">8</option>
+          <option value="9">9</option><option value="10">10</option>
+          <option value="11">11</option><option value="12">12</option>
+          <option value="13">13</option><option value="14">14</option>
+          <option value="15">15</option><option value="16">16</option>
+          <option value="17">17</option><option value="18">18</option>
+          <option value="19">19</option><option value="20">20</option>
+          <option value="21">21</option><option value="22">22</option>
+          <!--<option value="X" selected="true">X</option>-->
+          <!--<option value="Y" selected="true">Y</option>-->
+        </param>
+        <param name="chr_name" type="hidden" value="NA" />
+        <param name="chr_start" type="hidden" value="NA" />
+        <param name="chr_end" type="hidden" value="NA" />
+      </when>
+      <when value="region">
+        <!-- ==================== -->
+        <!--
+        <param name="genomic_region" type="text" optional="false" label="Genomic region" help="Supply a genomic region in format chr1:12345-23456" value="chr1:12345-23456" size="30">
+          <validator type="regex" message="No whitespace characters allowed">^[^\s\\]*$</validator>
+          <validator type="regex" message="String is not of correct format">^chr\d+\:\d+\-\d+$</validator>
+        </param>
+        -->
+        <param name="chr_name" size="2" type="integer" optional="false" value="" label="Chromosome" />
+        <param name="chr_start" size="15" type="integer" optional="false" value="" label="Start position on chromosome" />
+        <param name="chr_end" size="15" type="integer" optional="false" value="" label="End position on chromosome" />
+        <param name="chromosomes" type="hidden" value="NA" />
+      </when>
+
+    </conditional>
 
     <!-- ==================== -->
     <!-- Optional advanced options -->
@@ -98,6 +150,11 @@
 "${outputPng}" -> outputPngPath
 "${rdsFile}" -> rdsFilePath
 as.integer( "${sample_index}" ) -> SAMPLE_INDEX
+"${subset_selection.what_to_plot}" -> whatToPlot
+"${subset_selection.chromosomes}" -> chromosomesToPlotString
+as.integer( "${subset_selection.chr_name}" ) -> chrName
+as.integer( "${subset_selection.chr_start}" ) -> chrStart
+as.integer( "${subset_selection.chr_end}" ) -> chrEnd
 
 ## -----
 ## extra options
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/QDNAseq-regioning.R	Mon Jul 06 05:41:08 2015 -0400
@@ -0,0 +1,54 @@
+#!/usr/bin/Rscript
+
+## --------------------
+## prints all arguments as msg
+## --------------------
+catMsg <- function( msg=c() ){	
+	cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='')
+}
+
+## ==================================================
+## Start of analysis
+## ==================================================
+MAIN_NAME <- '[INFO] '
+catMsg( "Starting QDNAseq-export wrapper" )
+
+## supress msg to allow R to finish with non-error msg
+catMsg( "Loading R libraries" )
+suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) )
+suppressWarnings( suppressMessages( library( CGHcall, quietly = TRUE ) ) )
+suppressWarnings( suppressMessages( library( CGHregions, quietly = TRUE ) ) )
+
+## only one param: the tmp config file
+cmdLineArgs <- commandArgs(TRUE)
+config      <- cmdLineArgs[1]
+
+## sourcing the config file will load all input params
+## many variables are imported via sourced "config"
+source( config ) # outputFile, outputName
+
+systemUser <- system("whoami",T)
+qdnaseqVersion <- packageDescription( "QDNAseq" )$Version
+rVersion <- R.version.string
+rPath <- R.home()
+catMsg( c("QDNAseq version ", qdnaseqVersion) )
+catMsg( c( rVersion ) )
+
+qdnaseqObject <- readRDS( rdsFilePath )
+sampleNames <- sampleNames( qdnaseqObject )
+
+## sanity checks
+elements <- assayDataElementNames(qdnaseqObject)
+if ( ! "calls" %in% elements ) stop( "No calls present in object, regioning with CGHregions only work on called data" ) 
+if ( length(sampleNames) < 2 ) stop( "Object contains too few samples, regioning with CGHregions only works with at least two samples" ) 
+
+## analysis
+cgh <- makeCgh( qdnaseqObject, filter=TRUE )
+regions <- CGHregions( cgh, averror=0.00001 )
+outputData <- cbind( regions@featureData@data, regions@assayData$regions )
+
+## output
+write.table( outputData, outputFile, sep="\t", quote=FALSE, row.names=FALSE )
+
+## tell galaxy all seems ok
+q(status=0)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/QDNAseq-regioning.xml	Mon Jul 06 05:41:08 2015 -0400
@@ -0,0 +1,118 @@
+<tool id="QDNAseq-regioning" name="QDNAseq-regioning" version="0.0.1" force_history_refresh="True">
+  
+  <requirements>
+    
+    <!-- R 3.1.0 dependency will be used instead when available, now default R is used, see command -->
+    <!-- <requirement type="package" version="3.1.0">R</requirement> -->
+    <!-- <requirement type="package" version="1.2.2">qdnaseq</requirement> -->
+    
+  </requirements>
+
+  <description>Perform regioning on QDNAseq data</description>
+
+  <!-- change to /full/path/to/Rscript if required (eg /ccagc/lib/R/R-3.1.0/bin/Rscript) -->
+  <command interpreter="Rscript"> 
+    QDNAseq-regioning.R 
+    $cfg
+  </command>
+
+  <version_command interpreter="Rscript">QDNAseq-version.R</version_command>
+
+  <stdio>
+    <!-- Anything higher than 0 means the R script didnt finish (correctly) -->
+    <!-- Because different R packages deal with err/warn differently unable to waterproof this -->
+    <exit_code range="1:" level="fatal" description="R script finished too early, check log" />
+  </stdio>
+  
+  <inputs>
+    
+    <!-- ==================== -->
+    <!-- General inputs -->
+    <!-- ==================== -->
+    
+    <!-- Job name: must contain non-whitespace chars -->
+    <param name="output_name" type="text" optional="false" label="Analysis/ouput name" help="Supply a name for the outputs to remind you what they contain" value="TEST">
+      <!-- <validator type="empty_field" /> -->
+      <validator type="regex" message="No whitespace characters allowed">^[^\s\\]*$</validator>
+    </param>
+
+    <!-- ==================== -->
+    <!-- Input RDS -->
+    <!-- ==================== -->
+    <param name="rds_file_path" type="data" optional="False" format="rds" label="Input RDS file" help="RDS file should contain a QDNAseq R object (output of QDNAseq tool)" />
+
+    <param name="av_error" size="10" type="float" value="0.00001" optional="False" label="Average Error" help="By default low so that most if not all possible breakpoints in the data are kept, output will then contain smaller regions." />
+    
+  </inputs>
+
+  <!-- ==================== -->
+  <!-- Config file to pass params to R script -->
+  <!-- ==================== -->
+  <configfiles>
+    <configfile name="cfg">
+## Desc: this file is sourced in QDNAseq-region.R wrapper script
+##  as means to pass all galaxy params to R
+
+"${output_name}" -> outputName
+"${output_file}" -> outputFile
+"${rds_file_path}" -> rdsFilePath
+as.double( "${av_error}" ) -> avError
+
+    </configfile>
+  </configfiles>
+
+  <!-- ==================== -->
+  <!-- One text file as output -->
+  <!-- ==================== -->
+  <outputs>
+    <data format="tabular" name="output_file" label="QDNAseq: ${output_name} regions file" />
+  </outputs>
+
+  <help>
+
+**Introduction**
+
+This tool is a wrapper for the "CGHregions" function of the R Bioconductor package CGHregions_ and works on objects of the R bioconductor package QDNAseq_
+
+-----
+
+**What it does**
+
+**Input:** The input for this tool is a QDNAseq R object in RDS (R data structure) format, an (optional) output file of the QDNAseq galaxy tool. **Output:** Running this regioning tool overlaps the called called segments from all samples present in the input object. The output contains the merged segments with their call values for each sample.
+
+-----
+
+**Output examples**
+
+*Example TSV output:*
+
+::
+
+ Chromosome  Start    End      Nclone  AveDist  sample1  sample2
+ 1           6000001  7000000  42      0        1        -1
+ 1           7000001  8000000  11      0        0        -1
+
+-----
+
+.. class:: warningmark
+
+As there is no R 3.1.0 galaxy-package yet (a requirement for QDNAseq) that works with all requirements, the **dependencies** need to be installed by hand and available to the user under which galaxy runs: R (>= 3.1.0) and bioconductor package QDNAseq (>= 1.2.2). In case the path to this R installation is not "R", also the wrapper xml must be updated to include the correct path during installation of this tool.
+
+-----
+
+**Citation**
+
+For the underlying QDNAseq R package please cite: 
+Scheinin I, Sie D, Bengtsson H, van de Wiel MA, Olshen AB, van Thuijl HF, van Essen HF, Eijk PP, Rustenburg F, Meijer GA, Reijneveld JC, Wesseling P, Pinkel D, Albertson DG and Ylstra B (2014). “DNA copy number analysis of fresh and formalin-fixed specimens by shallow whole-genome sequencing with identification and exclusion of problematic regions in the genome assembly.” Genome Research. doi:10.1101/gr.175141.114.
+
+See also the bioconductor documentation of QDNAseq_ and CGHregions_
+
+.. _CGHregions: http://http://www.bioconductor.org/packages/release/bioc/html/CGHregions.html
+.. _QDNAseq: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
+
+.. image:: LGG150_copynumber.png
+.. image:: LGG150_copynumberSegmented.png
+
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/QDNAseq-version.R	Mon Jul 06 05:41:08 2015 -0400
@@ -0,0 +1,1 @@
+cat( packageDescription("QDNAseq", fields = "Version") )
\ No newline at end of file
--- a/QDNAseq.R	Mon Jul 06 05:38:44 2015 -0400
+++ b/QDNAseq.R	Mon Jul 06 05:41:08 2015 -0400
@@ -290,12 +290,12 @@
 
 	## proceed with segmenting / calling if requested
 	if ( doSegment ){
-		copyNumbersSegmented  <- segmentBins( copyNumbersSmooth, undo.splits=undoSplits, undo.SD=undoSD, transformFun="sqrt" )
+		copyNumbersSegmented  <- segmentBins( copyNumbersSmooth, undo.splits=undoSplits, undo.SD=undoSD, transformFun=c("sqrt") )
 		copyNumbersSegmented  <- normalizeSegmentedBins( copyNumbersSegmented )
 		outputData <- copyNumbersSegmented
 		outputType <- 'segments'
-		igvPath <- paste( outputPath, '/', rdsSegmName, sep='')
-		rdsPath <- paste( outputPath, '/', igvSegmName, sep='')
+		igvPath <- paste( outputPath, '/', igvSegmName, sep='')
+		rdsPath <- paste( outputPath, '/', rdsSegmName, sep='')
 	}
 	if ( doCall ){
 		copyNumbersCalled <- callBins(copyNumbersSegmented)
--- a/QDNAseq.xml	Mon Jul 06 05:38:44 2015 -0400
+++ b/QDNAseq.xml	Mon Jul 06 05:41:08 2015 -0400
@@ -19,6 +19,8 @@
     \$QDNASEQ_PATH
   </command>
 
+  <version_command interpreter="Rscript">QDNAseq-version.R</version_command>
+
   <stdio>
     <!-- Anything higher than 0 means the R script didnt finish (correctly) -->
     <!-- Because different R packages deal with err/warn differently unable to waterproof this -->
@@ -274,22 +276,22 @@
     <!-- WHY does there seem to be no way to use split() within this code in galaxy!!! -->
     <!-- now have to fall back to using unique names within binSizes instead of just integers -->
     <!-- Problem with integers is that both "1" and "5" are also present in eg "15,100" -->
-    <data format="tsv" name="txt_1000" label="QDNAseq: ${jobName} txt 1000kb">
+    <data format="tabular" name="txt_1000" label="QDNAseq: ${jobName} txt 1000kb">
       <filter>( "bin1000kb" in binSizes and txt2history == 'TRUE')</filter>
     </data>
-    <data format="tsv" name="txt_100" label="QDNAseq: ${jobName} txt 100kb">
+    <data format="tabular" name="txt_100" label="QDNAseq: ${jobName} txt 100kb">
       <filter>("bin100kb" in binSizes and txt2history == 'TRUE')</filter>
     </data>
-    <data format="tsv" name="txt_30" label="QDNAseq: ${jobName} txt 30kb">
+    <data format="tabular" name="txt_30" label="QDNAseq: ${jobName} txt 30kb">
       <filter>("bin30kb" in binSizes and txt2history == 'TRUE')</filter>
     </data>
-    <data format="tsv" name="txt_15" label="QDNAseq: ${jobName} txt 15kb">
+    <data format="tabular" name="txt_15" label="QDNAseq: ${jobName} txt 15kb">
       <filter>("bin15kb" in binSizes and txt2history == 'TRUE')</filter>
     </data>
-    <data format="tsv" name="txt_5" label="QDNAseq: ${jobName} txt 5kb">
+    <data format="tabular" name="txt_5" label="QDNAseq: ${jobName} txt 5kb">
       <filter>("bin5kb" in binSizes and txt2history == 'TRUE')</filter>
     </data>
-    <data format="tsv" name="txt_1" label="QDNAseq: ${jobName} txt 1kb">
+    <data format="tabular" name="txt_1" label="QDNAseq: ${jobName} txt 1kb">
       <filter>("bin1kb" in binSizes and txt2history == 'TRUE')</filter>
     </data>
 
@@ -356,7 +358,7 @@
 
 .. class:: warningmark
 
-Requires **internet access** for downloading bin-annotations from bitbucket and to show some styling (css) of the final report
+Some optional history input/output files are of format "rds" (file format to store a R object). This is not registered in galaxy by default, so has to be added to the available datatypes.
 
 -----
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Mon Jul 06 05:41:08 2015 -0400
@@ -0,0 +1,35 @@
+QDNAseq Galaxy tool
+====================
+
+This galaxy-tool is a wrapper for the R Bioconductor package [QDNAseq]
+
+It determines the copy number state of human chromosomes 1 - 22 for (shallow coverage) whole genome sequencing data.
+
+For questions/remarks about the bioconductor package itself, see also the code at [github].
+
+[QDNAseq]: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
+[github]: https://github.com/ccagc/QDNAseq
+
+
+Installation
+---------------------
+
+This tool should be installed into a galaxy installation from the (test)toolshed (tool-name: qdnaseq).
+
+
+Sample workflow
+---------------------
+
+You can test this tool at a galaxy installation by using built-in data by selecting the option "Run with test data" and press execute.
+
+
+Citation
+---------------------
+
+For the underlying QDNAseq R package please cite: 
+Scheinin I, Sie D, Bengtsson H, van de Wiel MA, Olshen AB, van Thuijl HF, van Essen HF, Eijk PP, Rustenburg F, Meijer GA, Reijneveld JC, Wesseling P, Pinkel D, Albertson DG and Ylstra B (2014). “DNA copy number analysis of fresh and formalin-fixed specimens by shallow whole-genome sequencing with identification and exclusion of problematic regions in the genome assembly.” Genome Research. doi:10.1101/gr.175141.114.
+
+See also the bioconductor package [documentation].
+
+[documentation]: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
+