changeset 88:d2ea2b842c21 draft default tip

Uploaded
author stef
date Mon, 06 Jul 2015 06:29:19 -0400
parents 8391cdb7479c
children
files QDNAseq-export.R QDNAseq-export.xml QDNAseq-plot.R QDNAseq-regioning.R QDNAseq-regioning.xml README.md
diffstat 6 files changed, 457 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/QDNAseq-export.R	Mon Jul 06 06:29:19 2015 -0400
@@ -0,0 +1,62 @@
+#!/usr/bin/Rscript
+
+## --------------------
+## prints all arguments as msg
+## --------------------
+catMsg <- function( msg=c() ){	
+	cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='')
+}
+
+## ==================================================
+## Start of analysis
+## ==================================================
+MAIN_NAME <- '[INFO] '
+catMsg( "Starting QDNAseq-export wrapper" )
+
+## supress msg to allow R to finish with non-error msg
+catMsg( "Loading R libraries" )
+suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) )
+suppressWarnings( suppressMessages( library( CGHcall, quietly = TRUE ) ) )
+
+## only one param: the tmp config file
+cmdLineArgs <- commandArgs(TRUE)
+config      <- cmdLineArgs[1]
+
+## sourcing the config file will load all input params
+## many variables are imported via sourced "config"
+source( config ) # outputFile, outputName, outputFormat, sampleIndex, filterBins
+
+systemUser <- system("whoami",T)
+qdnaseqVersion <- packageDescription( "QDNAseq" )$Version
+rVersion <- R.version.string
+rPath <- R.home()
+catMsg( c("QDNAseq version ", qdnaseqVersion) )
+catMsg( c( rVersion ) )
+qdnaseqObject <- readRDS( rdsFilePath )
+logTransform <- TRUE
+
+sampleNames <- sampleNames( qdnaseqObject )
+elements <- assayDataElementNames(qdnaseqObject)
+element <- dataLevel
+if (dataLevel == "segments") element <- "segmented"
+if (element == "calls") logTransform <- FALSE
+
+## sanity checks
+if ( ! element %in% elements ) stop( paste( "Data-level \"", element, "\" not present in object", sep='') ) 
+if ( outputFormat == "bed" ){
+	if ( sampleIndex == "None") stop("Bed option requires sample index")
+	if ( sampleIndex > length(sampleNames) ) stop("Chosen sample index not present in object")
+	qdnaseqObject <- qdnaseqObject[ ,sampleIndex]
+}
+
+## output
+exportBins( qdnaseqObject, 
+	file=outputFile, 
+	format=outputFormat, 
+	filter=filterBins, 
+	type=dataLevel, 
+	logTransform=logTransform 
+)
+
+## tell galaxy all seems ok
+q(status=0)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/QDNAseq-export.xml	Mon Jul 06 06:29:19 2015 -0400
@@ -0,0 +1,186 @@
+<tool id="QDNAseq-export" name="QDNAseq-export" version="0.0.1" force_history_refresh="True">
+  
+  <requirements>
+    
+    <!-- R 3.1.0 dependency will be used instead when available, now default R is used, see command -->
+    <!-- <requirement type="package" version="3.1.0">R</requirement> -->
+    <!-- <requirement type="package" version="1.2.2">qdnaseq</requirement> -->
+    
+  </requirements>
+
+  <description>Export QDNAseq data to tabular</description>
+
+  <!-- change to /full/path/to/Rscript if required (eg /ccagc/lib/R/R-3.1.0/bin/Rscript) -->
+  <command interpreter="Rscript"> 
+    QDNAseq-export.R 
+    $cfg
+  </command>
+
+  <version_command interpreter="Rscript">QDNAseq-version.R</version_command>
+
+  <stdio>
+    <!-- Anything higher than 0 means the R script didnt finish (correctly) -->
+    <!-- Because different R packages deal with err/warn differently unable to waterproof this -->
+    <exit_code range="1:" level="fatal" description="R script finished too early, check log" />
+  </stdio>
+  
+  <inputs>
+    
+    <!-- ==================== -->
+    <!-- General inputs -->
+    <!-- ==================== -->
+    
+    <!-- Job name: must contain non-whitespace chars -->
+    <param name="jobName" type="text" optional="false" label="Analysis/ouput name" help="Supply a name for the outputs to remind you what they contain" value="TEST">
+      <!-- <validator type="empty_field" /> -->
+      <validator type="regex" message="No whitespace characters allowed">^[^\s\\]*$</validator>
+    </param>
+
+    <!-- ==================== -->
+    <!-- Input RDS -->
+    <!-- ==================== -->
+    <param name="rdsFile" type="data" optional="False" format="rds" label="Input RDS file" help="RDS file should contain a QDNAseq R object (output of QDNAseq tool)" />
+
+    <!-- ==================== -->
+    <!-- Data level -->
+    <!-- ==================== -->
+    <param name="data_level" type="select" label="Level of data to export" help="If segmentation and/or calling has been performed the segmented or called values can be exported instead of copynumber (normalized read counts)">
+      <option value="copynumber">copynumbers</option>
+      <option value="segments">segments</option>
+      <option value="calls">calls</option>
+    </param>
+
+    <!-- ==================== -->
+    <!-- Include filtered bins or not -->
+    <!-- ==================== -->
+    <param name="filter_bins" type="select" label="Also output copynumber RDS files to history" help="Each bin has a filter status. By default the bins that were previously ignored by the analysis before are not send to the output. Set to 'include' if you want to include those as well">
+      <option value="TRUE">Exclude filtered bins</option>
+      <option value="FALSE">Include filtered bins</option>
+    </param>    	
+
+    <!-- ==================== -->
+    <!-- Output type -->
+    <!-- ==================== -->
+    <conditional name="output_format_selection">
+      <param name="output_format" type="select" label="Plot all samples in RDS object or choose one" help="All output is tabular, but depending on downstream use some formats are more handy than others">
+        <option value="tsv">TSV</option>
+        <option value="igv">IGV</option>
+        <option value="bed">BED</option>
+      </param>
+      <when value="bed">
+        <param name="sample_index" type="integer" required="True" value="1" label="sample-index (integer)" help="The object can host muliple samples while the BED option can only export one. Therefor a sample index must be chosen for this output." />
+      </when>
+      <when value="tsv">
+        <param name="sample_index" type="hidden" value="" />
+      </when>
+      <when value="igv">
+        <param name="sample_index" type="hidden" value="" />
+      </when>
+      
+    </conditional> 
+    
+  </inputs>
+
+  <!-- ==================== -->
+  <!-- Config file to pass params to R script -->
+  <!-- ==================== -->
+  <configfiles>
+    <configfile name="cfg">
+## Desc: this file is sourced in QDNAseq-export.R wrapper script
+##  as means to pass all galaxy params to R
+
+"${jobName}" -> outputName
+"${output_file}" -> outputFile
+"${data_level}" -> dataLevel
+"${output_format_selection.output_format}" -> outputFormat
+"${rdsFile}" -> rdsFilePath
+as.integer( "${output_format_selection.sample_index}" ) -> sampleIndex
+as.logical( "${filter_bins}" ) -> filterBins
+
+    </configfile>
+  </configfiles>
+
+  <!-- ==================== -->
+  <!-- One text file as output -->
+  <!-- ==================== -->
+  <outputs>
+    <data format="tabular" name="output_file" label="QDNAseq: ${jobName} export file" />
+  </outputs>
+
+  <help>
+
+**Introduction**
+
+This tool is a wrapper for the "exportBins" function of the R Bioconductor package QDNAseq_
+
+.. _QDNAseq: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
+
+-----
+
+**What it does**
+
+**Input:** The input for this tool is a QDNAseq R object in RDS (R data structure) format, a (optional) output file of the QDNAseq galaxy tool. **Output:** Running this export tool provides you with one output text file. When either TSV or IGV is chosen as output format, the output file contains data of all samples present in the object. When BED is chosen as output format, output contains only one sample (by default the first). **OutputDataLevel:** The output data can be of three levels. If the object contains segmented and/or call values these can be chosen instead of the default copynumber (log2 transformed normalized read counts).
+
+-----
+
+**Output examples**
+
+*Example BED output:*
+
+::
+
+ track name="SAMPLE1.bam" description="copynumber"
+ 1  6000000  7000000  1:6000001-7000000  1.293  +
+ 1  7000000  8000000  1:7000001-8000000  1.335  +
+
+*Example TSV output:*
+
+::
+
+ feature            chr start    end      SAMPLE1.bam  SAMPLE2.bam
+ 1:6000001-7000000  1	6000001  7000000  1.293	       -0.979
+ 1:7000001-8000000  1	7000001  8000000  1.335        -1.022
+
+*Example IGV output (at segmented level):*
+
+::
+
+ #type=COPY_NUMBER
+ #track coords=1
+ chr  start    end      feature            SAMPLE1.bam  SAMPLE2.bam
+ 1    6000001  7000000  1:6000001-7000000  1.314        -1.0005
+ 1    7000001  8000000  1:7000001-8000000  1.314        -1.0005
+
+*Example IGV output (at called level):*
+
+::
+
+ #type=COPY_NUMBER
+ #track coords=1
+ chr  start    end      feature            SAMPLE1.bam  SAMPLE2.bam
+ 1    6000001  7000000  1:6000001-7000000  1            -1
+ 1    7000001  8000000  1:7000001-8000000  1            -1
+
+-----
+
+.. class:: warningmark
+
+As there is no R 3.1.0 galaxy-package yet (a requirement for QDNAseq) that works with all requirements, the **dependencies** need to be installed by hand and available to the user under which galaxy runs: R (>= 3.1.0) and bioconductor package QDNAseq (>= 1.2.2). In case the path to this R installation is not "R", also the wrapper xml must be updated to include the correct path during installation of this tool.
+
+-----
+
+**Citation**
+
+For the underlying QDNAseq R package please cite: 
+Scheinin I, Sie D, Bengtsson H, van de Wiel MA, Olshen AB, van Thuijl HF, van Essen HF, Eijk PP, Rustenburg F, Meijer GA, Reijneveld JC, Wesseling P, Pinkel D, Albertson DG and Ylstra B (2014). “DNA copy number analysis of fresh and formalin-fixed specimens by shallow whole-genome sequencing with identification and exclusion of problematic regions in the genome assembly.” Genome Research. doi:10.1101/gr.175141.114.
+
+See also the bioconductor package_ documentation.
+
+.. _package: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
+
+.. image:: LGG150_copynumber.png
+.. image:: LGG150_copynumberSegmented.png
+
+  </help>
+
+</tool>
--- a/QDNAseq-plot.R	Thu May 28 14:19:57 2015 -0400
+++ b/QDNAseq-plot.R	Mon Jul 06 06:29:19 2015 -0400
@@ -51,13 +51,13 @@
 	if ( whatToPlot == 'everything' ){
 		catMsg( c( "Plotting all data in object" ) )
 		plot( qdnaseqObject[ ,sample ], ylab=ylab_text )
-		abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
+		abline( h=c(-4,-3,-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
 	} else if( whatToPlot == 'chromosomes' ){
 		catMsg( c( "Plotting subset of chromosomes" ) )
 		fdata <- qdnaseqObject@featureData@data
 		idx_region <- which( fdata$chromosome %in% chromosomesToPlot )
 		plot( qdnaseqObject[ idx_region, sample ], ylab=ylab_text )
-		abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
+		abline( h=c(-4,-3,-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" )
 	} else if( whatToPlot == 'region' ){
 		regionC <- chrName
 		regionS <- chrStart
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/QDNAseq-regioning.R	Mon Jul 06 06:29:19 2015 -0400
@@ -0,0 +1,54 @@
+#!/usr/bin/Rscript
+
+## --------------------
+## prints all arguments as msg
+## --------------------
+catMsg <- function( msg=c() ){	
+	cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='')
+}
+
+## ==================================================
+## Start of analysis
+## ==================================================
+MAIN_NAME <- '[INFO] '
+catMsg( "Starting QDNAseq-export wrapper" )
+
+## supress msg to allow R to finish with non-error msg
+catMsg( "Loading R libraries" )
+suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) )
+suppressWarnings( suppressMessages( library( CGHcall, quietly = TRUE ) ) )
+suppressWarnings( suppressMessages( library( CGHregions, quietly = TRUE ) ) )
+
+## only one param: the tmp config file
+cmdLineArgs <- commandArgs(TRUE)
+config      <- cmdLineArgs[1]
+
+## sourcing the config file will load all input params
+## many variables are imported via sourced "config"
+source( config ) # outputFile, outputName
+
+systemUser <- system("whoami",T)
+qdnaseqVersion <- packageDescription( "QDNAseq" )$Version
+rVersion <- R.version.string
+rPath <- R.home()
+catMsg( c("QDNAseq version ", qdnaseqVersion) )
+catMsg( c( rVersion ) )
+
+qdnaseqObject <- readRDS( rdsFilePath )
+sampleNames <- sampleNames( qdnaseqObject )
+
+## sanity checks
+elements <- assayDataElementNames(qdnaseqObject)
+if ( ! "calls" %in% elements ) stop( "No calls present in object, regioning with CGHregions only work on called data" ) 
+if ( length(sampleNames) < 2 ) stop( "Object contains too few samples, regioning with CGHregions only works with at least two samples" ) 
+
+## analysis
+cgh <- makeCgh( qdnaseqObject, filter=TRUE )
+regions <- CGHregions( cgh, averror=0.00001 )
+outputData <- cbind( regions@featureData@data, regions@assayData$regions )
+
+## output
+write.table( outputData, outputFile, sep="\t", quote=FALSE, row.names=FALSE )
+
+## tell galaxy all seems ok
+q(status=0)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/QDNAseq-regioning.xml	Mon Jul 06 06:29:19 2015 -0400
@@ -0,0 +1,118 @@
+<tool id="QDNAseq-regioning" name="QDNAseq-regioning" version="0.0.1" force_history_refresh="True">
+  
+  <requirements>
+    
+    <!-- R 3.1.0 dependency will be used instead when available, now default R is used, see command -->
+    <!-- <requirement type="package" version="3.1.0">R</requirement> -->
+    <!-- <requirement type="package" version="1.2.2">qdnaseq</requirement> -->
+    
+  </requirements>
+
+  <description>Perform regioning on QDNAseq data</description>
+
+  <!-- change to /full/path/to/Rscript if required (eg /ccagc/lib/R/R-3.1.0/bin/Rscript) -->
+  <command interpreter="Rscript"> 
+    QDNAseq-regioning.R 
+    $cfg
+  </command>
+
+  <version_command interpreter="Rscript">QDNAseq-version.R</version_command>
+
+  <stdio>
+    <!-- Anything higher than 0 means the R script didnt finish (correctly) -->
+    <!-- Because different R packages deal with err/warn differently unable to waterproof this -->
+    <exit_code range="1:" level="fatal" description="R script finished too early, check log" />
+  </stdio>
+  
+  <inputs>
+    
+    <!-- ==================== -->
+    <!-- General inputs -->
+    <!-- ==================== -->
+    
+    <!-- Job name: must contain non-whitespace chars -->
+    <param name="output_name" type="text" optional="false" label="Analysis/ouput name" help="Supply a name for the outputs to remind you what they contain" value="TEST">
+      <!-- <validator type="empty_field" /> -->
+      <validator type="regex" message="No whitespace characters allowed">^[^\s\\]*$</validator>
+    </param>
+
+    <!-- ==================== -->
+    <!-- Input RDS -->
+    <!-- ==================== -->
+    <param name="rds_file_path" type="data" optional="False" format="rds" label="Input RDS file" help="RDS file should contain a QDNAseq R object (output of QDNAseq tool)" />
+
+    <param name="av_error" size="10" type="float" value="0.00001" optional="False" label="Average Error" help="By default low so that most if not all possible breakpoints in the data are kept, output will then contain smaller regions." />
+    
+  </inputs>
+
+  <!-- ==================== -->
+  <!-- Config file to pass params to R script -->
+  <!-- ==================== -->
+  <configfiles>
+    <configfile name="cfg">
+## Desc: this file is sourced in QDNAseq-region.R wrapper script
+##  as means to pass all galaxy params to R
+
+"${output_name}" -> outputName
+"${output_file}" -> outputFile
+"${rds_file_path}" -> rdsFilePath
+as.double( "${av_error}" ) -> avError
+
+    </configfile>
+  </configfiles>
+
+  <!-- ==================== -->
+  <!-- One text file as output -->
+  <!-- ==================== -->
+  <outputs>
+    <data format="tabular" name="output_file" label="QDNAseq: ${output_name} regions file" />
+  </outputs>
+
+  <help>
+
+**Introduction**
+
+This tool is a wrapper for the "CGHregions" function of the R Bioconductor package CGHregions_ and works on objects of the R bioconductor package QDNAseq_
+
+-----
+
+**What it does**
+
+**Input:** The input for this tool is a QDNAseq R object in RDS (R data structure) format, an (optional) output file of the QDNAseq galaxy tool. **Output:** Running this regioning tool overlaps the called called segments from all samples present in the input object. The output contains the merged segments with their call values for each sample.
+
+-----
+
+**Output examples**
+
+*Example TSV output:*
+
+::
+
+ Chromosome  Start    End      Nclone  AveDist  sample1  sample2
+ 1           6000001  7000000  42      0        1        -1
+ 1           7000001  8000000  11      0        0        -1
+
+-----
+
+.. class:: warningmark
+
+As there is no R 3.1.0 galaxy-package yet (a requirement for QDNAseq) that works with all requirements, the **dependencies** need to be installed by hand and available to the user under which galaxy runs: R (>= 3.1.0) and bioconductor package QDNAseq (>= 1.2.2). In case the path to this R installation is not "R", also the wrapper xml must be updated to include the correct path during installation of this tool.
+
+-----
+
+**Citation**
+
+For the underlying QDNAseq R package please cite: 
+Scheinin I, Sie D, Bengtsson H, van de Wiel MA, Olshen AB, van Thuijl HF, van Essen HF, Eijk PP, Rustenburg F, Meijer GA, Reijneveld JC, Wesseling P, Pinkel D, Albertson DG and Ylstra B (2014). “DNA copy number analysis of fresh and formalin-fixed specimens by shallow whole-genome sequencing with identification and exclusion of problematic regions in the genome assembly.” Genome Research. doi:10.1101/gr.175141.114.
+
+See also the bioconductor documentation of QDNAseq_ and CGHregions_
+
+.. _CGHregions: http://http://www.bioconductor.org/packages/release/bioc/html/CGHregions.html
+.. _QDNAseq: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
+
+.. image:: LGG150_copynumber.png
+.. image:: LGG150_copynumberSegmented.png
+
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Mon Jul 06 06:29:19 2015 -0400
@@ -0,0 +1,35 @@
+QDNAseq Galaxy tool
+====================
+
+This galaxy-tool is a wrapper for the R Bioconductor package [QDNAseq]
+
+It determines the copy number state of human chromosomes 1 - 22 for (shallow coverage) whole genome sequencing data.
+
+For questions/remarks about the bioconductor package itself, see also the code at [github].
+
+[QDNAseq]: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
+[github]: https://github.com/ccagc/QDNAseq
+
+
+Installation
+---------------------
+
+This tool should be installed into a galaxy installation from the (test)toolshed (tool-name: qdnaseq).
+
+
+Sample workflow
+---------------------
+
+You can test this tool at a galaxy installation by using built-in data by selecting the option "Run with test data" and press execute.
+
+
+Citation
+---------------------
+
+For the underlying QDNAseq R package please cite: 
+Scheinin I, Sie D, Bengtsson H, van de Wiel MA, Olshen AB, van Thuijl HF, van Essen HF, Eijk PP, Rustenburg F, Meijer GA, Reijneveld JC, Wesseling P, Pinkel D, Albertson DG and Ylstra B (2014). “DNA copy number analysis of fresh and formalin-fixed specimens by shallow whole-genome sequencing with identification and exclusion of problematic regions in the genome assembly.” Genome Research. doi:10.1101/gr.175141.114.
+
+See also the bioconductor package [documentation].
+
+[documentation]: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html
+