changeset 0:ed24bacaa4d0 draft

planemo upload for repository https://github.com/blankenberglab/galaxy-tools-blankenberg/tree/master/tools/r-gpca commit ec2f585063522efa001e4d2a639e92a67758ad8c-dirty
author blankenberglab
date Mon, 19 Nov 2018 10:15:19 -0500
parents
children d93629e79f5e
files r-gpca.R r-gpca.xml test-data/batch_in_1.tabular test-data/data_in_1.tabular test-data/html_outfile_1/gDist.png test-data/html_outfile_1/guided_1v2.png test-data/html_outfile_1/guided_npcs.png test-data/html_outfile_1/guided_var.png test-data/html_outfile_1/html_outfile_1.html test-data/html_outfile_1/unguided_1v2.png test-data/html_outfile_1/unguided_npcs.png test-data/html_outfile_1/unguided_var.png test-data/numbers_outfile_1.tabular
diffstat 13 files changed, 330 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/r-gpca.R	Mon Nov 19 10:15:19 2018 -0500
@@ -0,0 +1,186 @@
+#!/usr/bin/env Rscript 
+library(optparse)
+library(gPCA)
+
+
+# parse options
+option_list = list(
+   make_option(
+    c("--version"),
+    action = "store_true",
+    default = FALSE,
+    help = "Print version and exit"
+   ),
+  make_option(
+    c("-i", "--batch"),
+    action = "store",
+    default = NA,
+    type = 'character',
+    help = "Input batch"
+  ),
+  make_option(
+    c("-d", "--data"),
+    action = "store",
+    default = NA,
+    type = 'character',
+    help = "Input data"
+  ),
+   make_option(
+    c("-n", "--nperm"),
+    action = "store",
+    default = 1000,
+    type = 'numeric',
+    help = "Number of permutations to preform"
+   ),
+   make_option(
+    c("-c", "--center"),
+    action = "store_true",
+    default = FALSE,
+    help = "Is the input batch and data centered [default %default]"
+	 ),
+   make_option(
+    c("-y", "--scaleY"),
+    action = "store_true",
+    default = FALSE,
+    help = "Scale to Y axis"
+   ),
+   make_option(
+    c("-f", "--filt"),
+    action = "store",
+    default = NULL,
+    type = 'numeric',
+    help = "Retain features"
+	 ),
+   make_option(
+    c( "--npcs"),
+    action = "store",
+    default = 3,
+    type = 'numeric',
+    help = "Number of principal components to plot"
+   ),
+   make_option(
+    c("-p", "--path"),
+    action = "store",
+    default = '$html_outfile.extra_files_path', 
+    type = 'character',
+    help = "File path"
+   ),
+   make_option(
+    c("-s", "--seed"),
+    action = "store",
+    default = NULL,
+    type = 'numeric',
+    help = "Set a seed number"
+   ),
+   make_option(
+    c("-x", "--numbers_outfile"),
+    action = "store",
+    default = NA, 
+    type = 'character',
+    help = "Numbers output"
+ 	 ),
+   make_option(
+    c("-o", "--html_outfile"),
+    action = "store",
+    default = NA,
+    type = 'character',
+    help = "Output",
+  )
+)
+
+opt <-parse_args(OptionParser(option_list = option_list))
+
+if (opt$version){
+  # print version and exit
+  cat(paste("gPCA version", toString(packageVersion("gPCA"))), "\n")
+  quit()
+}
+
+# Check parameter values
+
+if ( ! file.exists(opt$batch)){
+  stop((paste('File for batch', opt$batch, 'does not exist')))
+}
+if ( ! file.exists(opt$data)){
+  stop((paste('File for data', opt$data, 'does not exist')))
+}
+
+batch<-as.numeric(factor(as.matrix(read.delim(opt$batch, header=TRUE, row.names=1, sep="\t"))))
+
+data<-t(as.matrix(read.delim(opt$data, row.names=1, header=TRUE, sep="\t")))
+
+out<-gPCA.batchdetect(x=data, batch=batch, center=opt$center, scaleY=opt$scaleY, nperm=opt$nperm, filt=opt$filt, seed=opt$seed)
+
+out$varPCg1<-((out$varPCg1-out$varPCu1)/out$varPCg1)*100
+
+dir.create(opt$path);  
+
+write.table(data.frame(out$delta, out$p.val, out$varPCg1),file=opt$numbers_outfile, sep="\t", row.name=FALSE, quote=FALSE)
+
+# General Distribution 
+par(mai=c(0.8,0.8,0.1,0.1),cex=0.8)
+png(paste(opt$path,'/gDist.png', sep=""), width=1020, height=800, units='px')
+gDist(out)
+invisible(dev.off())
+
+
+# Guided/Unguided 1v2
+par(mai=c(0.8,0.8,0.1,0.1),cex=0.8)
+png(paste(opt$path,'/guided_1v2.png', sep=""), width=1020, height=800, units='px')
+PCplot(out,ug="unguided",type="1v2")
+invisible(dev.off())
+
+par(mai=c(0.8,0.8,0.1,0.1),cex=0.8)
+png(paste(opt$path,'/unguided_1v2.png', sep=""), width=1020, height=800, units='px')
+PCplot(out,ug="unguided",type="1v2")
+invisible(dev.off())
+
+
+# Guided/Unguided comp,3
+par(mai=c(0.65,0.65,0.1,0.1),cex=0.8)
+png(paste(opt$path,'/guided_npcs.png', sep=""), width=1020, height=800, units='px')
+PCplot(out,ug="guided",type="comp",npcs=opt$npcs)
+invisible(dev.off())
+
+par(mai=c(0.65,0.65,0.1,0.1),cex=0.8)
+png(paste(opt$path,'/unguided_npcs.png', sep=""), width=1020, height=800, units='px')
+PCplot(out,ug="unguided",type="comp",npcs=opt$npcs)
+invisible(dev.off())
+
+
+# Guided/Unguided CumlativeVarPlot
+par(mai=c(0.8,0.8,0.1,0.1),cex=0.8)
+png(paste(opt$path,'/guided_var.png', sep=""), width=1020, height=800, units='px')
+CumulativeVarPlot(out,ug="guided",col="blue")
+invisible(dev.off())
+
+par(mai=c(0.8,0.8,0.1,0.1),cex=0.8)
+png(paste(opt$path,'/unguided_var.png', sep=""), width=1020, height=800, units='px')
+CumulativeVarPlot(out,ug="unguided",col="blue")
+invisible(dev.off())
+
+
+write(paste('<html>
+<table id="r-gpca-wrap" align="center" border="1">
+ <tr>
+  <th>Delta</th><th>P-value</th><th>varPCg1</th>
+ </tr>
+ <tr>
+  <td id=delta>',out$delta,'</td><td id=p.val>',out$p.val,'</td><td id=varPCg1>',out$varPCg1,'</td>
+ </tr>
+</table>
+
+<center><img src="gDist.png"/></center><br>
+<center><title>Guided 1v2</title><br><img src="guided_1v2.png"/></center><br>
+<center><title>Unguided 1v2</title><br><img src="unguided_1v2.png"/></center><br>
+
+<center><title>Guided Compare to ',opt$npcs,'</title><br><img src="guided_npcs.png"/></center><br>
+<center><title>Unguided Compare to ',opt$npcs,'</title><br><img src="unguided_npcs.png"/></center><br>
+
+<center><title>Guided Cumulative Variance</title><br><img src="guided_var.png"/></center><br>
+<center><title>Unguided Cumulative Variance</title><br><img src="unguided_var.png"/></center><br>
+
+</html>'
+),file = opt$html_outfile)
+
+sessionInfo()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/r-gpca.xml	Mon Nov 19 10:15:19 2018 -0500
@@ -0,0 +1,108 @@
+<tool id="gpca" name="gPCA" version="1.0.0">
+  <description>Batch Effect Detection via Guided Principal Components Analysis</description>
+    <requirements>
+        <requirement type="package" version="1.0">r-gpca</requirement>
+        <requirement type="package" version="1.6.0">r-optparse</requirement>
+    </requirements>
+    <version_command><![CDATA['$__tool_directory__/r-gpca.R' --version]]></version_command>
+    <command detect_errors="exit_code">
+    <![CDATA[
+      
+      Rscript '$__tool_directory__/r-gpca.R'
+        -i '${batch}'
+        -d '${data}'
+        -n '${nperm}'
+        ${center}
+        ${scaleY}
+        #if str($filt):
+            -f '${filt}'
+        #end if
+        #if str($seed):
+            -s '${seed}'
+        #end if
+        --npcs '${npcs}'
+        -p '${html_outfile.extra_files_path}'
+        -x '${numbers_outfile}'
+        -o '${html_outfile}';
+
+    ]]></command>
+    
+    <inputs>
+        <param type="data" name="data"  argument="--data" format="tabular"   label="Data input file" />
+        <param type="data" name="batch" argument="--batch" format="tabular" label="Batch input file" />
+        <param type="integer" name="nperm" argument="--nperm" value="1000" help="default=1000" label="Number of permutations to preform"/>
+        <param type="boolean" name="center" argument="--center" checked="false" truevalue="--center" falsevalue="" help="Set to true to prevent gPCA from centering" label="Is the data centered?"/>
+        <param type="boolean" name="scaleY" argument="--scaleY" checked="false" truevalue="--scaleY" falsevalue="" help="default=No" label="Should Y be scaled based on number of samples in each batch?"/>
+        <param type="integer" name="filt" optional="true" argument="--filt"  min="1" value="" label="How many features to keep?" help="Leave blank for default(NULL)"/>
+        <param type="integer" name="seed" optional="true" argument="--seed"  min="1" value="" label="Seed #" help="Leave blank for default(NULL)"/>
+        <param type="integer" name="npcs" optional="false" argument="--npcs"  min="3" value="3" label="Principal Components to plot" help="Number of principal components to plot"/>
+    </inputs>
+
+    <outputs>
+        <data name="html_outfile" format="html" label="${tool.name} on ${on_string} html"/>
+        <data name="numbers_outfile" format="tabular" label="${tool.name} on ${on_string}: values" />
+    </outputs>
+
+    <tests>
+      <test>
+        <param name="batch" ftype="tabular" value="batch_in_1.tabular"  />
+        <param name="data" ftype="tabular" value="data_in_1.tabular"  />
+        <param name="nperm" value="1000" />
+        <param name="center" value="False" />
+        <param name="scaleY" value="False" />
+        <param name="filt" value="" />
+        <param name="npcs" value="3" />
+        <param name="seed" value="123456" />
+        <output name="html_outfile" ftype="html" file="html_outfile_1/html_outfile_1.html">
+          <extra_files type="file" name="gDist.png" value="html_outfile_1/gDist.png" compare="sim_size"/>
+          <extra_files type="file" name="guided_1v2.png" value="html_outfile_1/guided_1v2.png" compare="sim_size"/>
+          <extra_files type="file" name="unguided_1v2.png" value="html_outfile_1/unguided_1v2.png" compare="sim_size"/>
+          <extra_files type="file" name="guided_npcs.png" value="html_outfile_1/guided_npcs.png" compare="sim_size"/>
+          <extra_files type="file" name="unguided_npcs.png" value="html_outfile_1/unguided_npcs.png" compare="sim_size"/>
+          <extra_files type="file" name="guided_var.png" value="html_outfile_1/guided_var.png" compare="sim_size"/>
+          <extra_files type="file" name="unguided_var.png" value="html_outfile_1/unguided_var.png" compare="sim_size"/>
+        </output>
+        <output name="numbers_outfile" ftype="tabular" file="numbers_outfile_1.tabular"/>
+      </test>
+    </tests>
+    
+    <help>
+      <![CDATA[
+
+      **What it does**
+      Utilizes guided principal components analysis for the detection of batch effects in high-throughput data.
+
+      Arguments:
+
+      batch: a length n vector that indicates batch (group or class) for each observation.
+
+      x/data:  an nxp matrix of data where n denotes observations and p denotes the numberof features (e.g. probe, gene, SNP, etc.).
+
+      filt:  (optional) the number of features to retain after applying a variance filter. If NULL,  no  filter  is  applied. Filtering  can  significantly  reduce  the  processing time in the case of very large data sets.
+
+      nperm: the number of permutations to perform for the permutation test, default is 1000.
+
+      center:  (logical) Is your data x centered? If not, then center=FALSE and gPCA.batchdetect will center it for you.
+
+      scaleY:  (logical) Do you want to scale the Y matrix by the number of samples in each batch? If not, then center=FALSE (default), otherwise, center=TRUE.
+
+      seed:  the seed number for set.seed(). Default is NULL.
+
+      Literature:
+
+      https://cran.r-project.org/web/packages/gPCA/gPCA.pdf
+
+      https://cran.r-project.org/web/packages/gPCA/vignettes/gPCA.pdf
+      ]]>
+    </help>
+    
+  <citations>
+    <citation type="doi">10.1093/bioinformatics/btt480</citation>
+    <citation type="bibtex">@unpublished{gpca:2018,
+      title  = "gPCA Galaxy Tool",
+      author = "Chris Lowe, Daniel Blankenberg",
+      url    = "https://github.com/BlankenbergLab/galaxy-tools-blankenberg/tree/master/tools/r-gpca",
+      year   = "2018 (accessed November 16, 2018)"}
+    </citation>
+  </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/batch_in_1.tabular	Mon Nov 19 10:15:19 2018 -0500
@@ -0,0 +1,2 @@
+#batch	sample_1	sample_2	sample_3	sample_4	sample_5	sample_6	sample_7	sample_8	sample_9	sample_10
+time_period	batch_1	batch_3	batch_3	batch_3	batch_2	batch_2	batch_1	batch_2	batch_3	batch_3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/data_in_1.tabular	Mon Nov 19 10:15:19 2018 -0500
@@ -0,0 +1,11 @@
+#feature	sample_1	sample_2	sample_3	sample_4	sample_5	sample_6	sample_7	sample_8	sample_9	sample_10
+feature_1	1.74952393913539	1.6739529249317	-2.71641723445615	0.895354674014103	-1.18383993129495	-1.75127636793225	-2.31029339131191	0.793766087160378	1.3935397883822	-3.96218563865692
+feature_2	1.16893865919845	-3.51406256465814	0.455090475971535	-2.13304127796526	-2.62874555718239	1.80409061129936	-2.17682319950269	2.6241768514842	1.2742993822058	-3.42407446944136
+feature_3	2.39896813272329	-3.35153914200262	2.58177487886913	-4.47154846386962	-4.60481257982491	0.724429447466709	-2.72040689316203	1.5526264468751	2.27411893974989	-3.10174037010434
+feature_4	0.769706277557876	-2.60297853853694	2.26504504946117	-3.73465862148677	-2.09259003217505	1.70331395464047	-1.99263254105004	1.17192052086955	2.23707867457778	-2.894282863192
+feature_5	2.31411568136784	-3.75995659147112	1.64923584729505	-4.42186433595126	-2.00336067137353	2.97712035244198	-4.97418199048249	1.51211587005453	2.42293761014066	-2.73075288666884
+feature_6	2.21891525583109	-2.4436233887657	1.05438929743106	-3.46364414372677	-2.44604147468078	1.16087584944093	-2.52395292464374	0.702809594333191	1.60908123060767	-3.33643910453829
+feature_7	1.80728541774419	-2.85772357185281	0.920121600876659	-2.51188018410717	-2.69228640760197	1.95720836059593	-2.80768246809752	0.369114413868452	1.60433720239336	-1.12904146856823
+feature_8	2.04087850933637	-3.39761432466996	0.78752941468568	-3.4531969900781	-3.49624377726172	1.76272395685181	-3.13939004852551	2.79966721181393	1.97976571266901	-3.80633309732772
+feature_9	-3.41445914612496	2.18268228473137	-2.31074303702276	2.58059964125049	-2.56482429357585	-1.71273401701983	2.16230886925131	1.69099778494149	2.47397641001459	-3.13160239840691
+feature_10	-2.4801501229036	0.277265548866568	-2.72716618514931	1.6517716429499	-2.08301939352716	-1.88620555973345	1.58528847585932	1.61929447444201	1.43353547610189	-2.6799919776754
Binary file test-data/html_outfile_1/gDist.png has changed
Binary file test-data/html_outfile_1/guided_1v2.png has changed
Binary file test-data/html_outfile_1/guided_npcs.png has changed
Binary file test-data/html_outfile_1/guided_var.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/html_outfile_1/html_outfile_1.html	Mon Nov 19 10:15:19 2018 -0500
@@ -0,0 +1,21 @@
+<html>
+<table id="r-gpca-wrap" align="center" border="1">
+ <tr>
+  <th>Delta</th><th>P-value</th><th>varPCg1</th>
+ </tr>
+ <tr>
+  <td id=delta> 0.824447005030577 </td><td id=p.val> 0.512 </td><td id=varPCg1> 20.6295699072817 </td>
+ </tr>
+</table>
+
+<center><img src="gDist.png"/></center><br>
+<center><title>Guided 1v2</title><br><img src="guided_1v2.png"/></center><br>
+<center><title>Unguided 1v2</title><br><img src="unguided_1v2.png"/></center><br>
+
+<center><title>Guided Compare to  3 </title><br><img src="guided_npcs.png"/></center><br>
+<center><title>Unguided Compare to  3 </title><br><img src="unguided_npcs.png"/></center><br>
+
+<center><title>Guided Cumulative Variance</title><br><img src="guided_var.png"/></center><br>
+<center><title>Unguided Cumulative Variance</title><br><img src="unguided_var.png"/></center><br>
+
+</html>
Binary file test-data/html_outfile_1/unguided_1v2.png has changed
Binary file test-data/html_outfile_1/unguided_npcs.png has changed
Binary file test-data/html_outfile_1/unguided_var.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/numbers_outfile_1.tabular	Mon Nov 19 10:15:19 2018 -0500
@@ -0,0 +1,2 @@
+out.delta	out.p.val	out.varPCg1
+0.824447005030577	0.512	20.6295699072817