changeset 0:3ea49d2fa85f draft default tip

Uploaded
author ric
date Fri, 07 Oct 2016 05:08:11 -0400
parents
children
files test/cnmops.R test/cnmops.xml
diffstat 2 files changed, 247 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/cnmops.R	Fri Oct 07 05:08:11 2016 -0400
@@ -0,0 +1,96 @@
+# Load ExomeDepth library (without warnings)
+suppressMessages(library(cn.mops))
+
+# Import parameters from xml wrapper (args_file)
+args  <- commandArgs(trailingOnly=TRUE)
+param <- read.table(args[1],sep="=", as.is=TRUE)
+
+# Set common parameters
+target       <- read.table(param[match("target",param[,1]),2], sep="\t", as.is=TRUE)
+padding      <- as.numeric(param[match("padding",param[,1]),2])
+mapping_mode <- param[match("mapping_mode",param[,1]),2]
+output       <- param[match("output",param[,1]),2]
+
+# Set advanced parameters
+advanced_mode <- ifelse("advanced_mode" %in% param[,1], TRUE, FALSE)
+if (advanced_mode){
+	prior_impact    <- as.integer(param[match("prior_impact",param[,1]),2])
+	cyc             <- as.integer(param[match("cyc",param[,1]),2])
+	norm_type       <- param[match("norm_type",param[,1]),2]
+	norm            <- as.logical(param[match("norm",param[,1]),2])
+	upper_threshold <- as.numeric(param[match("upper_threshold",param[,1]),2])
+	lower_threshold <- as.numeric(param[match("lower_threshold",param[,1]),2])
+	min_width       <- as.integer(param[match("min_width",param[,1]),2])
+	seq_alg         <- param[match("seq_alg",param[,1]),2]
+	min_read_count  <- as.integer(param[match("min_read_count",param[,1]),2])
+	use_median      <- as.logical(param[match("use_median",param[,1]),2])
+}
+
+# Create symbolic links for multiple bam and bai 
+bam         <- param[param[,1]=="bam",2]
+bam_bai     <- param[param[,1]=="bam_bai",2]
+bam_label   <- param[param[,1]=="bam_label",2]
+bam_label   <- gsub(" ", "_", bam_label)
+
+for(i in 1:length(bam)){
+  stopifnot(file.symlink(bam[i], paste(bam_label[i], "bam", sep=".")))
+  stopifnot(file.symlink(bam_bai[i], paste(bam_label[i], "bam.bai", sep=".")))
+}
+
+# Create genomic ranges
+gr  <- GRanges(target[,1],IRanges(target[,2]-padding,target[,3]+padding))
+# Merge overlapping segments (make sense if padding != 0)
+gr <- reduce(gr)
+
+
+# Get read counts
+BAMFiles <- paste(bam_label, "bam", sep=".")
+X <- suppressMessages(getSegmentReadCountsFromBAM(BAMFiles,GR=gr,
+												  mode=mapping_mode,
+												  sampleNames=bam_label))
+
+if (advanced_mode){
+	resCNMOPS <- suppressMessages(exomecn.mops(X,
+									priorImpact=prior_impact,
+									cyc=cyc,
+									normType=norm_type,
+									norm=norm,
+									upperThreshold=upper_threshold,
+									lowerThreshold=lower_threshold,
+									minWidth=min_width,
+									seqAlgorithm=seq_alg,
+									minReadCount=min_read_count,
+									useMedian=use_median))
+}else{
+	resCNMOPS <- suppressMessages(exomecn.mops(X))
+}
+
+resCNMOPS <- calcIntegerCopyNumbers(resCNMOPS)
+
+# Extract individual CNV calls
+# Legend for CN values is as follows:
+# The copy number classes default to CN0, CN1, CN2, CN3, .., CN8.
+# CN2 is the normal copy number for diploid samples.
+# CN1 is a heterozygous deletion and 
+# CN0 is a homozygous deletion.
+# CN3 thru CN8 are amplifications.
+# For non-tumor samples the highest copy number class is 8 - higher copy numbers have not been reported.
+# CN8 is expected to have 4 times as many reads (for times as high coverage) as CN2.
+# For tumor samples very high copy numbers have been observed (e.g. CN64),
+# therefore the parameters of cn.mops have to be adjusted to allow for high copy numbers.
+# A way to set the parameters is given in https://gist.github.com/gklambauer/8955203
+res <- cnvs(resCNMOPS)
+
+# Convert results to data.frame
+df <- data.frame(chr=as.character(seqnames(res)),
+  				 starts=start(res),
+  				 #ranges(res),
+  				 ends=end(res),
+  				 as.data.frame(elementMetadata(res)))
+
+# Remove CN2 (= normal copy number), if any
+df <- df[df$CN != "CN2",]
+
+# Write results
+write.table(df, sep='\t', quote=FALSE, file = output,
+            row.names = FALSE, dec=".")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/cnmops.xml	Fri Oct 07 05:08:11 2016 -0400
@@ -0,0 +1,151 @@
+<tool id="cnmops" name="cn.mops" version="1.0.0">
+  <description>cnv caller</description>
+  <requirements>
+        <requirement type="package" version="1.16.2">bioconductor-cn.mops</requirement>
+    </requirements>
+  <command interpreter="Rscript">
+   cnmops.R $args_file
+  </command>
+  <configfiles>
+    <configfile name="args_file">target=$targetFile
+padding=$padding
+mapping_mode=$mapping_mode
+#for $i in $inputs
+bam=${i.input}
+bam_bai=${i.input.metadata.bam_index}
+#if str($i.label.value) != "":
+bam_label=${$i.label.value}
+#else
+bam_label=${i.input.dataset.name}
+#end if
+#end for
+output=$output
+#if str($advanced_options.advanced_options_select) == "yes"
+advanced_mode=TRUE
+$advanced_options.prior_impact=$prior_impact
+$advanced_options.cyc=$cyc
+$advanced_options.norm=$norm
+$advanced_options.norm_type=$norm_type
+$advanced_options.upper_threshold=$upper_threshold
+$advanced_options.lower_threshold=$lower_threshold
+$advanced_options.min_width=$min_width
+$advanced_options.seq_alg=$seq_alg
+$advanced_options.min_read_count=$min_read_count
+$advanced_options.use_median=$use_median
+#end if
+</configfile>
+  </configfiles>
+  <inputs>
+    <param format="bed" name="targetFile" type="data" label="Target regions (BED)">
+      <validator type="unspecified_build" />
+    </param>
+    <param name="padding" type="integer" value="100" label="Padding" help="Amount of padding (in bp) to add around each target region" />
+    <param name="mapping_mode" type="select" label="Mapping mode" help="Select whether the mapping algorithm was run using paired or unpaired reads">
+        <option value="paired" selected="true">paired</option>
+        <option value="unpaired">unpaired</option>
+    </param>
+    <repeat name="inputs" title="BAM" min="2" help="Need to add more files? Use controls below.">
+      <param format="bam" name="input" type="data" label="BAM file">
+        <options>
+          <filter type="data_meta" ref="targetFile" key="dbkey"/>
+        </options>
+      </param>
+      <param name="label" type="text" size="30" value="" label="Label" help="Label to use in the output. If not given, the dataset name will be used instead">
+        <validator type="regex" message="Spaces are not allowed">^\S*$</validator>
+      </param>
+    </repeat>
+    
+    <!-- Advanced options -->
+    <conditional name="advanced_options">
+      <param name="advanced_options_select" type="select" label="Show advanced options">
+        <option value="yes">Yes</option>
+        <option value="no" selected="true">No</option>
+      </param>
+      <when value="yes">
+        <param name="prior_impact" type="integer" value="10" label="Prior impact" help="Positive real value that reflects how strong the prior assumption affects the result. The higher the value the more samples will be assumed to have copy number 2." />
+        <param name="cyc" type="integer" value="20" label="Cycles" help="Positive integer that sets the number of cycles for the algorithm. Usually after less than 15 cycles convergence is reached." />
+        <param name="norm" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Apply normalization" help="" />
+        <param name="norm_type" type="select" label="Normalization type" help="Mode of the normalization technique. Read counts will be scaled sample-wise.">
+          <option value="poisson" selected="true">poisson</option>
+          <option value="mean">mean</option>
+          <option value="min">min</option>
+          <option value="median">median</option>
+          <option value="quant">quant</option>
+          <option value="mode">mode</option>
+        </param>
+        <param name="upper_threshold" type="float" value="0.55" label="Cut-off for copy number gains" help="All CNV calling values above this value will be called as gain. The value should be set close to the log2 of the expected foldchange for copy number 3 or 4." />
+        <param name="lower_threshold" type="float" value="-0.8" label="Cut-off for copy number losses" help="All CNV calling values below this value will be called as loss. The value should be set close to the log2 of the expected foldchange for copy number 1 or 0." />
+        <param name="min_width" type="integer" value="5" label="Minimum width" help="Minimum number of segments a CNV should span." />
+        <param name="seq_alg" type="select" label="Segmentation algorithm" help="">
+          <option value="fast" selected="false">fast</option>
+          <option value="DNAcopy">DNAcopy</option>
+        </param>
+        <param name="min_read_count" type="integer" value="1" label="Minimum read count" help="If all samples are below this value the algorithm will return the prior knowledge. This prevents the algorithm from being applied to segments with very low coverage." />
+        <param name="use_median" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Use median" help="Whether median instead of mean of a target region should be used for the CNV call." />
+    </when>
+      <when value="no" />
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="bed" name="output" label="${tool.name} on ${on_string}" />
+  </outputs>
+  <help>
+
+**What it does**
+
+This tool uses cn.mops (Copy Number estimation by a Mixture Of PoissonS) to call copy number variations
+and aberrations (CNVs and CNAs) from targeted next generation sequencing (NGS) data.
+
+**Output format**
+
+========== ========================
+Column     Description
+---------- ------------------------
+chr        Chromosome
+starts     Start of CNV region
+ends       End of CNV region
+sampleName Name of the sample with CNV
+median     Median value of CNV
+mean       Mean value of CNV
+CN         Copy number class (see below)
+========== ========================
+
+Copy number classes are identified as follows::
+
+ CN2: normal copy number for diploid samples
+ CN1: heterozygous deletion 
+ CN0: homozygous deletion 
+ CN3 - CN8: amplifications
+ 
+For non-tumor samples the highest copy number class is 8 - higher copy numbers have not been reported.
+CN8 is expected to have 4 times as many reads (for times as high coverage) as CN2.
+For tumor samples very high copy numbers have been observed (e.g. CN64),
+therefore the parameters of cn.mops have to be adjusted to allow for high copy numbers.
+A way to set the parameters is given in https://gist.github.com/gklambauer/8955203
+
+**License and citation**
+
+This Galaxy tool is Copyright © 2015 `CRS4 Srl.`_ and is released under the `MIT license`_.
+
+.. _CRS4 Srl.: http://www.crs4.it/
+.. _MIT license: http://opensource.org/licenses/MIT
+
+You can use this tool only if you agree to the license terms of: `cn.mops`_.
+
+.. _cn.mops: http://bioconductor.org/packages/release/bioc/html/cn.mops.html
+
+If you use this tool, please cite:
+
+- |Cuccuru2014|_
+- |Klambauer2012|_.
+
+.. |Cuccuru2014| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2014) Orione, a web-based framework for NGS analysis in microbiology. *Bioinformatics* 30(13), 1928-1929
+.. _Cuccuru2014: http://bioinformatics.oxfordjournals.org/content/30/13/1928
+.. |Klambauer2012| replace:: Klambauer, G., *et al.* (2012) cn.MOPS: Mixture of Poissons for Discovering Copy Number Variations in Next Generation Sequencing Data with a Low False Discovery Rate. *Nucleic Acids Research* 40, e69
+.. _Klambauer2012: http://http://nar.oxfordjournals.org/content/40/9/e69
+  </help>
+  <citations>
+    <citation type="doi">10.1093/bioinformatics/btu135</citation>
+    <citation type="doi">10.1093/nar/gks003</citation>
+  </citations>
+</tool>