# HG changeset patch # User stef # Date 1436175668 14400 # Node ID f89205f51e2784fa60ab8c8c8b22fa66fccd49fc # Parent 06928963138101a69c868099d7cd159f8765f5b8 Uploaded diff -r 069289631381 -r f89205f51e27 QDNAseq-export.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/QDNAseq-export.R Mon Jul 06 05:41:08 2015 -0400 @@ -0,0 +1,62 @@ +#!/usr/bin/Rscript + +## -------------------- +## prints all arguments as msg +## -------------------- +catMsg <- function( msg=c() ){ + cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='') +} + +## ================================================== +## Start of analysis +## ================================================== +MAIN_NAME <- '[INFO] ' +catMsg( "Starting QDNAseq-export wrapper" ) + +## supress msg to allow R to finish with non-error msg +catMsg( "Loading R libraries" ) +suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) ) +suppressWarnings( suppressMessages( library( CGHcall, quietly = TRUE ) ) ) + +## only one param: the tmp config file +cmdLineArgs <- commandArgs(TRUE) +config <- cmdLineArgs[1] + +## sourcing the config file will load all input params +## many variables are imported via sourced "config" +source( config ) # outputFile, outputName, outputFormat, sampleIndex, filterBins + +systemUser <- system("whoami",T) +qdnaseqVersion <- packageDescription( "QDNAseq" )$Version +rVersion <- R.version.string +rPath <- R.home() +catMsg( c("QDNAseq version ", qdnaseqVersion) ) +catMsg( c( rVersion ) ) +qdnaseqObject <- readRDS( rdsFilePath ) +logTransform <- TRUE + +sampleNames <- sampleNames( qdnaseqObject ) +elements <- assayDataElementNames(qdnaseqObject) +element <- dataLevel +if (dataLevel == "segments") element <- "segmented" +if (element == "calls") logTransform <- FALSE + +## sanity checks +if ( ! element %in% elements ) stop( paste( "Data-level \"", element, "\" not present in object", sep='') ) +if ( outputFormat == "bed" ){ + if ( sampleIndex == "None") stop("Bed option requires sample index") + if ( sampleIndex > length(sampleNames) ) stop("Chosen sample index not present in object") + qdnaseqObject <- qdnaseqObject[ ,sampleIndex] +} + +## output +exportBins( qdnaseqObject, + file=outputFile, + format=outputFormat, + filter=filterBins, + type=dataLevel, + logTransform=logTransform +) + +## tell galaxy all seems ok +q(status=0) diff -r 069289631381 -r f89205f51e27 QDNAseq-export.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/QDNAseq-export.xml Mon Jul 06 05:41:08 2015 -0400 @@ -0,0 +1,186 @@ + + + + + + + + + + + Export QDNAseq data to tabular + + + + QDNAseq-export.R + $cfg + + + QDNAseq-version.R + + + + + + + + + + + + + + + + + ^[^\s\\]*$ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +## Desc: this file is sourced in QDNAseq-export.R wrapper script +## as means to pass all galaxy params to R + +"${jobName}" -> outputName +"${output_file}" -> outputFile +"${data_level}" -> dataLevel +"${output_format_selection.output_format}" -> outputFormat +"${rdsFile}" -> rdsFilePath +as.integer( "${output_format_selection.sample_index}" ) -> sampleIndex +as.logical( "${filter_bins}" ) -> filterBins + + + + + + + + + + + + + +**Introduction** + +This tool is a wrapper for the "exportBins" function of the R Bioconductor package QDNAseq_ + +.. _QDNAseq: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html + +----- + +**What it does** + +**Input:** The input for this tool is a QDNAseq R object in RDS (R data structure) format, a (optional) output file of the QDNAseq galaxy tool. **Output:** Running this export tool provides you with one output text file. When either TSV or IGV is chosen as output format, the output file contains data of all samples present in the object. When BED is chosen as output format, output contains only one sample (by default the first). **OutputDataLevel:** The output data can be of three levels. If the object contains segmented and/or call values these can be chosen instead of the default copynumber (log2 transformed normalized read counts). + +----- + +**Output examples** + +*Example BED output:* + +:: + + track name="SAMPLE1.bam" description="copynumber" + 1 6000000 7000000 1:6000001-7000000 1.293 + + 1 7000000 8000000 1:7000001-8000000 1.335 + + +*Example TSV output:* + +:: + + feature chr start end SAMPLE1.bam SAMPLE2.bam + 1:6000001-7000000 1 6000001 7000000 1.293 -0.979 + 1:7000001-8000000 1 7000001 8000000 1.335 -1.022 + +*Example IGV output (at segmented level):* + +:: + + #type=COPY_NUMBER + #track coords=1 + chr start end feature SAMPLE1.bam SAMPLE2.bam + 1 6000001 7000000 1:6000001-7000000 1.314 -1.0005 + 1 7000001 8000000 1:7000001-8000000 1.314 -1.0005 + +*Example IGV output (at called level):* + +:: + + #type=COPY_NUMBER + #track coords=1 + chr start end feature SAMPLE1.bam SAMPLE2.bam + 1 6000001 7000000 1:6000001-7000000 1 -1 + 1 7000001 8000000 1:7000001-8000000 1 -1 + +----- + +.. class:: warningmark + +As there is no R 3.1.0 galaxy-package yet (a requirement for QDNAseq) that works with all requirements, the **dependencies** need to be installed by hand and available to the user under which galaxy runs: R (>= 3.1.0) and bioconductor package QDNAseq (>= 1.2.2). In case the path to this R installation is not "R", also the wrapper xml must be updated to include the correct path during installation of this tool. + +----- + +**Citation** + +For the underlying QDNAseq R package please cite: +Scheinin I, Sie D, Bengtsson H, van de Wiel MA, Olshen AB, van Thuijl HF, van Essen HF, Eijk PP, Rustenburg F, Meijer GA, Reijneveld JC, Wesseling P, Pinkel D, Albertson DG and Ylstra B (2014). “DNA copy number analysis of fresh and formalin-fixed specimens by shallow whole-genome sequencing with identification and exclusion of problematic regions in the genome assembly.” Genome Research. doi:10.1101/gr.175141.114. + +See also the bioconductor package_ documentation. + +.. _package: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html + +.. image:: LGG150_copynumber.png +.. image:: LGG150_copynumberSegmented.png + + + + diff -r 069289631381 -r f89205f51e27 QDNAseq-plot.R --- a/QDNAseq-plot.R Mon Jul 06 05:38:44 2015 -0400 +++ b/QDNAseq-plot.R Mon Jul 06 05:41:08 2015 -0400 @@ -37,13 +37,42 @@ catMsg( c( rVersion ) ) qdnaseqObject <- readRDS( rdsFilePath ) +chromosomesToPlot <- unlist( strsplit( chromosomesToPlotString, ",") ) + +#cat( "CHROM: ", chromosomesToPlotString, "\n" ) +#cat( "REGION: ", regionToPlotString, "\n" ) +#cat( "What to plot: ", whatToPlot, "\n" ) + ## COPYNUMBER PLOT sample <- SAMPLE_INDEX png( outputPngPath, width=PLOT_WIDTH, height=PLOT_HEIGHT, res=PLOT_RES ) par( PAR_SET ) - plot( qdnaseqObject[ ,sample ] ) + ylab_text <- "log2 read counts" + if ( whatToPlot == 'everything' ){ + catMsg( c( "Plotting all data in object" ) ) + plot( qdnaseqObject[ ,sample ], ylab=ylab_text ) + abline( h=c(-4,-3,-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" ) + } else if( whatToPlot == 'chromosomes' ){ + catMsg( c( "Plotting subset of chromosomes" ) ) + fdata <- qdnaseqObject@featureData@data + idx_region <- which( fdata$chromosome %in% chromosomesToPlot ) + plot( qdnaseqObject[ idx_region, sample ], ylab=ylab_text ) + abline( h=c(-4,-3,-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" ) + } else if( whatToPlot == 'region' ){ + regionC <- chrName + regionS <- chrStart + regionE <- chrEnd + if ( regionS > regionE ) stop("Chosen start is > end") + catMsg( c( "Plotting genomic region (chr=", regionC, " start=", regionS, " end=", regionE, ")" ) ) + + fdata <- qdnaseqObject@featureData@data + idx_region <- which( fdata$chromosome == regionC & fdata$start > regionS & fdata$end < regionE ) + cat( idx_region, "\n") + + plot( qdnaseqObject[ idx_region, sample ], doCalls=FALSE, ylab=ylab_text ) + } #mtext( "plotted in galaxy", 3 ) - abline( h=c(-2,-1,1,2,3,4), lty=1, lwd=0.5, col="grey" ) + dev.off() diff -r 069289631381 -r f89205f51e27 QDNAseq-plot.xml --- a/QDNAseq-plot.xml Mon Jul 06 05:38:44 2015 -0400 +++ b/QDNAseq-plot.xml Mon Jul 06 05:41:08 2015 -0400 @@ -16,6 +16,8 @@ $cfg + QDNAseq-version.R + @@ -57,7 +59,57 @@ --> - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -98,6 +150,11 @@ "${outputPng}" -> outputPngPath "${rdsFile}" -> rdsFilePath as.integer( "${sample_index}" ) -> SAMPLE_INDEX +"${subset_selection.what_to_plot}" -> whatToPlot +"${subset_selection.chromosomes}" -> chromosomesToPlotString +as.integer( "${subset_selection.chr_name}" ) -> chrName +as.integer( "${subset_selection.chr_start}" ) -> chrStart +as.integer( "${subset_selection.chr_end}" ) -> chrEnd ## ----- ## extra options diff -r 069289631381 -r f89205f51e27 QDNAseq-regioning.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/QDNAseq-regioning.R Mon Jul 06 05:41:08 2015 -0400 @@ -0,0 +1,54 @@ +#!/usr/bin/Rscript + +## -------------------- +## prints all arguments as msg +## -------------------- +catMsg <- function( msg=c() ){ + cat( MAIN_NAME, paste( msg, collapse="" ), "\n", sep='') +} + +## ================================================== +## Start of analysis +## ================================================== +MAIN_NAME <- '[INFO] ' +catMsg( "Starting QDNAseq-export wrapper" ) + +## supress msg to allow R to finish with non-error msg +catMsg( "Loading R libraries" ) +suppressWarnings( suppressMessages( library( QDNAseq, quietly = TRUE ) ) ) +suppressWarnings( suppressMessages( library( CGHcall, quietly = TRUE ) ) ) +suppressWarnings( suppressMessages( library( CGHregions, quietly = TRUE ) ) ) + +## only one param: the tmp config file +cmdLineArgs <- commandArgs(TRUE) +config <- cmdLineArgs[1] + +## sourcing the config file will load all input params +## many variables are imported via sourced "config" +source( config ) # outputFile, outputName + +systemUser <- system("whoami",T) +qdnaseqVersion <- packageDescription( "QDNAseq" )$Version +rVersion <- R.version.string +rPath <- R.home() +catMsg( c("QDNAseq version ", qdnaseqVersion) ) +catMsg( c( rVersion ) ) + +qdnaseqObject <- readRDS( rdsFilePath ) +sampleNames <- sampleNames( qdnaseqObject ) + +## sanity checks +elements <- assayDataElementNames(qdnaseqObject) +if ( ! "calls" %in% elements ) stop( "No calls present in object, regioning with CGHregions only work on called data" ) +if ( length(sampleNames) < 2 ) stop( "Object contains too few samples, regioning with CGHregions only works with at least two samples" ) + +## analysis +cgh <- makeCgh( qdnaseqObject, filter=TRUE ) +regions <- CGHregions( cgh, averror=0.00001 ) +outputData <- cbind( regions@featureData@data, regions@assayData$regions ) + +## output +write.table( outputData, outputFile, sep="\t", quote=FALSE, row.names=FALSE ) + +## tell galaxy all seems ok +q(status=0) diff -r 069289631381 -r f89205f51e27 QDNAseq-regioning.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/QDNAseq-regioning.xml Mon Jul 06 05:41:08 2015 -0400 @@ -0,0 +1,118 @@ + + + + + + + + + + + Perform regioning on QDNAseq data + + + + QDNAseq-regioning.R + $cfg + + + QDNAseq-version.R + + + + + + + + + + + + + + + + + ^[^\s\\]*$ + + + + + + + + + + + + + + + + +## Desc: this file is sourced in QDNAseq-region.R wrapper script +## as means to pass all galaxy params to R + +"${output_name}" -> outputName +"${output_file}" -> outputFile +"${rds_file_path}" -> rdsFilePath +as.double( "${av_error}" ) -> avError + + + + + + + + + + + + + +**Introduction** + +This tool is a wrapper for the "CGHregions" function of the R Bioconductor package CGHregions_ and works on objects of the R bioconductor package QDNAseq_ + +----- + +**What it does** + +**Input:** The input for this tool is a QDNAseq R object in RDS (R data structure) format, an (optional) output file of the QDNAseq galaxy tool. **Output:** Running this regioning tool overlaps the called called segments from all samples present in the input object. The output contains the merged segments with their call values for each sample. + +----- + +**Output examples** + +*Example TSV output:* + +:: + + Chromosome Start End Nclone AveDist sample1 sample2 + 1 6000001 7000000 42 0 1 -1 + 1 7000001 8000000 11 0 0 -1 + +----- + +.. class:: warningmark + +As there is no R 3.1.0 galaxy-package yet (a requirement for QDNAseq) that works with all requirements, the **dependencies** need to be installed by hand and available to the user under which galaxy runs: R (>= 3.1.0) and bioconductor package QDNAseq (>= 1.2.2). In case the path to this R installation is not "R", also the wrapper xml must be updated to include the correct path during installation of this tool. + +----- + +**Citation** + +For the underlying QDNAseq R package please cite: +Scheinin I, Sie D, Bengtsson H, van de Wiel MA, Olshen AB, van Thuijl HF, van Essen HF, Eijk PP, Rustenburg F, Meijer GA, Reijneveld JC, Wesseling P, Pinkel D, Albertson DG and Ylstra B (2014). “DNA copy number analysis of fresh and formalin-fixed specimens by shallow whole-genome sequencing with identification and exclusion of problematic regions in the genome assembly.” Genome Research. doi:10.1101/gr.175141.114. + +See also the bioconductor documentation of QDNAseq_ and CGHregions_ + +.. _CGHregions: http://http://www.bioconductor.org/packages/release/bioc/html/CGHregions.html +.. _QDNAseq: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html + +.. image:: LGG150_copynumber.png +.. image:: LGG150_copynumberSegmented.png + + + + diff -r 069289631381 -r f89205f51e27 QDNAseq-version.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/QDNAseq-version.R Mon Jul 06 05:41:08 2015 -0400 @@ -0,0 +1,1 @@ +cat( packageDescription("QDNAseq", fields = "Version") ) \ No newline at end of file diff -r 069289631381 -r f89205f51e27 QDNAseq.R --- a/QDNAseq.R Mon Jul 06 05:38:44 2015 -0400 +++ b/QDNAseq.R Mon Jul 06 05:41:08 2015 -0400 @@ -290,12 +290,12 @@ ## proceed with segmenting / calling if requested if ( doSegment ){ - copyNumbersSegmented <- segmentBins( copyNumbersSmooth, undo.splits=undoSplits, undo.SD=undoSD, transformFun="sqrt" ) + copyNumbersSegmented <- segmentBins( copyNumbersSmooth, undo.splits=undoSplits, undo.SD=undoSD, transformFun=c("sqrt") ) copyNumbersSegmented <- normalizeSegmentedBins( copyNumbersSegmented ) outputData <- copyNumbersSegmented outputType <- 'segments' - igvPath <- paste( outputPath, '/', rdsSegmName, sep='') - rdsPath <- paste( outputPath, '/', igvSegmName, sep='') + igvPath <- paste( outputPath, '/', igvSegmName, sep='') + rdsPath <- paste( outputPath, '/', rdsSegmName, sep='') } if ( doCall ){ copyNumbersCalled <- callBins(copyNumbersSegmented) diff -r 069289631381 -r f89205f51e27 QDNAseq.xml --- a/QDNAseq.xml Mon Jul 06 05:38:44 2015 -0400 +++ b/QDNAseq.xml Mon Jul 06 05:41:08 2015 -0400 @@ -19,6 +19,8 @@ \$QDNASEQ_PATH + QDNAseq-version.R + @@ -274,22 +276,22 @@ - + ( "bin1000kb" in binSizes and txt2history == 'TRUE') - + ("bin100kb" in binSizes and txt2history == 'TRUE') - + ("bin30kb" in binSizes and txt2history == 'TRUE') - + ("bin15kb" in binSizes and txt2history == 'TRUE') - + ("bin5kb" in binSizes and txt2history == 'TRUE') - + ("bin1kb" in binSizes and txt2history == 'TRUE') @@ -356,7 +358,7 @@ .. class:: warningmark -Requires **internet access** for downloading bin-annotations from bitbucket and to show some styling (css) of the final report +Some optional history input/output files are of format "rds" (file format to store a R object). This is not registered in galaxy by default, so has to be added to the available datatypes. ----- diff -r 069289631381 -r f89205f51e27 README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Mon Jul 06 05:41:08 2015 -0400 @@ -0,0 +1,35 @@ +QDNAseq Galaxy tool +==================== + +This galaxy-tool is a wrapper for the R Bioconductor package [QDNAseq] + +It determines the copy number state of human chromosomes 1 - 22 for (shallow coverage) whole genome sequencing data. + +For questions/remarks about the bioconductor package itself, see also the code at [github]. + +[QDNAseq]: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html +[github]: https://github.com/ccagc/QDNAseq + + +Installation +--------------------- + +This tool should be installed into a galaxy installation from the (test)toolshed (tool-name: qdnaseq). + + +Sample workflow +--------------------- + +You can test this tool at a galaxy installation by using built-in data by selecting the option "Run with test data" and press execute. + + +Citation +--------------------- + +For the underlying QDNAseq R package please cite: +Scheinin I, Sie D, Bengtsson H, van de Wiel MA, Olshen AB, van Thuijl HF, van Essen HF, Eijk PP, Rustenburg F, Meijer GA, Reijneveld JC, Wesseling P, Pinkel D, Albertson DG and Ylstra B (2014). “DNA copy number analysis of fresh and formalin-fixed specimens by shallow whole-genome sequencing with identification and exclusion of problematic regions in the genome assembly.” Genome Research. doi:10.1101/gr.175141.114. + +See also the bioconductor package [documentation]. + +[documentation]: http://www.bioconductor.org/packages/release/bioc/html/QDNAseq.html +