# HG changeset patch # User stef # Date 1436178559 14400 # Node ID d2ea2b842c2181e2e2310fad3702ce0ddf935fb5 # Parent 8391cdb7479c6d046af14bd2b369e927211cc726 Uploaded diff -r 8391cdb7479c -r d2ea2b842c21 QDNAseq-export.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/QDNAseq-export.R Mon Jul 06 06:29:19 2015 -0400 @@ -0,0 +1,62 @@ +#!/usr/bin/Rscript + +## -------------------- +## prints all arguments as msg +## -------------------- +catMsg <- function( msg=c() ){ + cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='') +} + +## ================================================== +## Start of analysis +## ================================================== +MAIN_NAME <- '[INFO] ' +catMsg( "Starting QDNAseq-export wrapper" ) + +## supress msg to allow R to finish with non-error msg +catMsg( "Loading R libraries" ) +suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) ) +suppressWarnings( suppressMessages( library( CGHcall, quietly = TRUE ) ) ) + +## only one param: the tmp config file +cmdLineArgs <- commandArgs(TRUE) +config <- cmdLineArgs[1] + +## sourcing the config file will load all input params +## many variables are imported via sourced "config" +source( config ) # outputFile, outputName, outputFormat, sampleIndex, filterBins + +systemUser <- system("whoami",T) +qdnaseqVersion <- packageDescription( "QDNAseq" )$Version +rVersion <- R.version.string +rPath <- R.home() +catMsg( c("QDNAseq version ", qdnaseqVersion) ) +catMsg( c( rVersion ) ) +qdnaseqObject <- readRDS( rdsFilePath ) +logTransform <- TRUE + +sampleNames <- sampleNames( qdnaseqObject ) +elements <- assayDataElementNames(qdnaseqObject) +element <- dataLevel +if (dataLevel == "segments") element <- "segmented" +if (element == "calls") logTransform <- FALSE + +## sanity checks +if ( ! element %in% elements ) stop( paste( "Data-level \"", element, "\" not present in object", sep='') ) +if ( outputFormat == "bed" ){ + if ( sampleIndex == "None") stop("Bed option requires sample index") + if ( sampleIndex > length(sampleNames) ) stop("Chosen sample index not present in object") + qdnaseqObject <- qdnaseqObject[ ,sampleIndex] +} + +## output +exportBins( qdnaseqObject, + file=outputFile, + format=outputFormat, + filter=filterBins, + type=dataLevel, + logTransform=logTransform +) + +## tell galaxy all seems ok +q(status=0) diff -r 8391cdb7479c -r d2ea2b842c21 QDNAseq-export.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/QDNAseq-export.xml Mon Jul 06 06:29:19 2015 -0400 @@ -0,0 +1,186 @@ + + + + + + + + + + + Export QDNAseq data to tabular + + + + QDNAseq-export.R + $cfg + + + QDNAseq-version.R + + + + + + + + + + + + + + + + + ^[^\s\\]*$ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Desc: this file is sourced in QDNAseq-export.R wrapper script +## as means to pass all galaxy params to R + +"${jobName}" -> outputName +"${output_file}" -> outputFile +"${data_level}" -> dataLevel +"${output_format_selection.output_format}" -> outputFormat +"${rdsFile}" -> rdsFilePath +as.integer( "${output_format_selection.sample_index}" ) -> sampleIndex +as.logical( "${filter_bins}" ) -> filterBins + + + + + + + + + + + + + +**Introduction** + +This tool is a wrapper for the "exportBins" function of the R Bioconductor package QDNAseq_ + +.. _QDNAseq: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html + +----- + +**What it does** + +**Input:** The input for this tool is a QDNAseq R object in RDS (R data structure) format, a (optional) output file of the QDNAseq galaxy tool. **Output:** Running this export tool provides you with one output text file. When either TSV or IGV is chosen as output format, the output file contains data of all samples present in the object. When BED is chosen as output format, output contains only one sample (by default the first). **OutputDataLevel:** The output data can be of three levels. If the object contains segmented and/or call values these can be chosen instead of the default copynumber (log2 transformed normalized read counts). + +----- + +**Output examples** + +*Example BED output:* + +:: + + track name="SAMPLE1.bam" description="copynumber" + 1 6000000 7000000 1:6000001-7000000 1.293 + + 1 7000000 8000000 1:7000001-8000000 1.335 + + +*Example TSV output:* + +:: + + feature chr start end SAMPLE1.bam SAMPLE2.bam + 1:6000001-7000000 1 6000001 7000000 1.293 -0.979 + 1:7000001-8000000 1 7000001 8000000 1.335 -1.022 + +*Example IGV output (at segmented level):* + +:: + + #type=COPY_NUMBER + #track coords=1 + chr start end feature SAMPLE1.bam SAMPLE2.bam + 1 6000001 7000000 1:6000001-7000000 1.314 -1.0005 + 1 7000001 8000000 1:7000001-8000000 1.314 -1.0005 + +*Example IGV output (at called level):* + +:: + + #type=COPY_NUMBER + #track coords=1 + chr start end feature SAMPLE1.bam SAMPLE2.bam + 1 6000001 7000000 1:6000001-7000000 1 -1 + 1 7000001 8000000 1:7000001-8000000 1 -1 + +----- + +.. class:: warningmark + +As there is no R 3.1.0 galaxy-package yet (a requirement for QDNAseq) that works with all requirements, the **dependencies** need to be installed by hand and available to the user under which galaxy runs: R (>= 3.1.0) and bioconductor package QDNAseq (>= 1.2.2). In case the path to this R installation is not "R", also the wrapper xml must be updated to include the correct path during installation of this tool. + +----- + +**Citation** + +For the underlying QDNAseq R package please cite: +Scheinin I, Sie D, Bengtsson H, van de Wiel MA, Olshen AB, van Thuijl HF, van Essen HF, Eijk PP, Rustenburg F, Meijer GA, Reijneveld JC, Wesseling P, Pinkel D, Albertson DG and Ylstra B (2014). “DNA copy number analysis of fresh and formalin-fixed specimens by shallow whole-genome sequencing with identification and exclusion of problematic regions in the genome assembly.” Genome Research. doi:10.1101/gr.175141.114. + +See also the bioconductor package_ documentation. + +.. _package: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html + +.. image:: LGG150_copynumber.png +.. image:: LGG150_copynumberSegmented.png + + + + diff -r 8391cdb7479c -r d2ea2b842c21 QDNAseq-plot.R --- a/QDNAseq-plot.R Thu May 28 14:19:57 2015 -0400 +++ b/QDNAseq-plot.R Mon Jul 06 06:29:19 2015 -0400 @@ -51,13 +51,13 @@ if ( whatToPlot == 'everything' ){ catMsg( c( "Plotting all data in object" ) ) plot( qdnaseqObject[ ,sample ], ylab=ylab_text ) - abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" ) + abline( h=c(-4,-3,-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" ) } else if( whatToPlot == 'chromosomes' ){ catMsg( c( "Plotting subset of chromosomes" ) ) fdata <- qdnaseqObject@featureData@data idx_region <- which( fdata$chromosome %in% chromosomesToPlot ) plot( qdnaseqObject[ idx_region, sample ], ylab=ylab_text ) - abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" ) + abline( h=c(-4,-3,-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" ) } else if( whatToPlot == 'region' ){ regionC <- chrName regionS <- chrStart diff -r 8391cdb7479c -r d2ea2b842c21 QDNAseq-regioning.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/QDNAseq-regioning.R Mon Jul 06 06:29:19 2015 -0400 @@ -0,0 +1,54 @@ +#!/usr/bin/Rscript + +## -------------------- +## prints all arguments as msg +## -------------------- +catMsg <- function( msg=c() ){ + cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='') +} + +## ================================================== +## Start of analysis +## ================================================== +MAIN_NAME <- '[INFO] ' +catMsg( "Starting QDNAseq-export wrapper" ) + +## supress msg to allow R to finish with non-error msg +catMsg( "Loading R libraries" ) +suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) ) +suppressWarnings( suppressMessages( library( CGHcall, quietly = TRUE ) ) ) +suppressWarnings( suppressMessages( library( CGHregions, quietly = TRUE ) ) ) + +## only one param: the tmp config file +cmdLineArgs <- commandArgs(TRUE) +config <- cmdLineArgs[1] + +## sourcing the config file will load all input params +## many variables are imported via sourced "config" +source( config ) # outputFile, outputName + +systemUser <- system("whoami",T) +qdnaseqVersion <- packageDescription( "QDNAseq" )$Version +rVersion <- R.version.string +rPath <- R.home() +catMsg( c("QDNAseq version ", qdnaseqVersion) ) +catMsg( c( rVersion ) ) + +qdnaseqObject <- readRDS( rdsFilePath ) +sampleNames <- sampleNames( qdnaseqObject ) + +## sanity checks +elements <- assayDataElementNames(qdnaseqObject) +if ( ! "calls" %in% elements ) stop( "No calls present in object, regioning with CGHregions only work on called data" ) +if ( length(sampleNames) < 2 ) stop( "Object contains too few samples, regioning with CGHregions only works with at least two samples" ) + +## analysis +cgh <- makeCgh( qdnaseqObject, filter=TRUE ) +regions <- CGHregions( cgh, averror=0.00001 ) +outputData <- cbind( regions@featureData@data, regions@assayData$regions ) + +## output +write.table( outputData, outputFile, sep="\t", quote=FALSE, row.names=FALSE ) + +## tell galaxy all seems ok +q(status=0) diff -r 8391cdb7479c -r d2ea2b842c21 QDNAseq-regioning.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/QDNAseq-regioning.xml Mon Jul 06 06:29:19 2015 -0400 @@ -0,0 +1,118 @@ + + + + + + + + + + + Perform regioning on QDNAseq data + + + + QDNAseq-regioning.R + $cfg + + + QDNAseq-version.R + + + + + + + + + + + + + + + + + ^[^\s\\]*$ + + + + + + + + + + + + + + + + +## Desc: this file is sourced in QDNAseq-region.R wrapper script +## as means to pass all galaxy params to R + +"${output_name}" -> outputName +"${output_file}" -> outputFile +"${rds_file_path}" -> rdsFilePath +as.double( "${av_error}" ) -> avError + + + + + + + + + + + + + +**Introduction** + +This tool is a wrapper for the "CGHregions" function of the R Bioconductor package CGHregions_ and works on objects of the R bioconductor package QDNAseq_ + +----- + +**What it does** + +**Input:** The input for this tool is a QDNAseq R object in RDS (R data structure) format, an (optional) output file of the QDNAseq galaxy tool. **Output:** Running this regioning tool overlaps the called called segments from all samples present in the input object. The output contains the merged segments with their call values for each sample. + +----- + +**Output examples** + +*Example TSV output:* + +:: + + Chromosome Start End Nclone AveDist sample1 sample2 + 1 6000001 7000000 42 0 1 -1 + 1 7000001 8000000 11 0 0 -1 + +----- + +.. class:: warningmark + +As there is no R 3.1.0 galaxy-package yet (a requirement for QDNAseq) that works with all requirements, the **dependencies** need to be installed by hand and available to the user under which galaxy runs: R (>= 3.1.0) and bioconductor package QDNAseq (>= 1.2.2). In case the path to this R installation is not "R", also the wrapper xml must be updated to include the correct path during installation of this tool. + +----- + +**Citation** + +For the underlying QDNAseq R package please cite: +Scheinin I, Sie D, Bengtsson H, van de Wiel MA, Olshen AB, van Thuijl HF, van Essen HF, Eijk PP, Rustenburg F, Meijer GA, Reijneveld JC, Wesseling P, Pinkel D, Albertson DG and Ylstra B (2014). “DNA copy number analysis of fresh and formalin-fixed specimens by shallow whole-genome sequencing with identification and exclusion of problematic regions in the genome assembly.” Genome Research. doi:10.1101/gr.175141.114. + +See also the bioconductor documentation of QDNAseq_ and CGHregions_ + +.. _CGHregions: http://http://www.bioconductor.org/packages/release/bioc/html/CGHregions.html +.. _QDNAseq: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html + +.. image:: LGG150_copynumber.png +.. image:: LGG150_copynumberSegmented.png + + + + diff -r 8391cdb7479c -r d2ea2b842c21 README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Mon Jul 06 06:29:19 2015 -0400 @@ -0,0 +1,35 @@ +QDNAseq Galaxy tool +==================== + +This galaxy-tool is a wrapper for the R Bioconductor package [QDNAseq] + +It determines the copy number state of human chromosomes 1 - 22 for (shallow coverage) whole genome sequencing data. + +For questions/remarks about the bioconductor package itself, see also the code at [github]. + +[QDNAseq]: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html +[github]: https://github.com/ccagc/QDNAseq + + +Installation +--------------------- + +This tool should be installed into a galaxy installation from the (test)toolshed (tool-name: qdnaseq). + + +Sample workflow +--------------------- + +You can test this tool at a galaxy installation by using built-in data by selecting the option "Run with test data" and press execute. + + +Citation +--------------------- + +For the underlying QDNAseq R package please cite: +Scheinin I, Sie D, Bengtsson H, van de Wiel MA, Olshen AB, van Thuijl HF, van Essen HF, Eijk PP, Rustenburg F, Meijer GA, Reijneveld JC, Wesseling P, Pinkel D, Albertson DG and Ylstra B (2014). “DNA copy number analysis of fresh and formalin-fixed specimens by shallow whole-genome sequencing with identification and exclusion of problematic regions in the genome assembly.” Genome Research. doi:10.1101/gr.175141.114. + +See also the bioconductor package [documentation]. + +[documentation]: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html +