Mercurial > repos > blankenberglab > gpca
changeset 0:ed24bacaa4d0 draft
planemo upload for repository https://github.com/blankenberglab/galaxy-tools-blankenberg/tree/master/tools/r-gpca commit ec2f585063522efa001e4d2a639e92a67758ad8c-dirty
| author | blankenberglab |
|---|---|
| date | Mon, 19 Nov 2018 10:15:19 -0500 |
| parents | |
| children | d93629e79f5e |
| files | r-gpca.R r-gpca.xml test-data/batch_in_1.tabular test-data/data_in_1.tabular test-data/html_outfile_1/gDist.png test-data/html_outfile_1/guided_1v2.png test-data/html_outfile_1/guided_npcs.png test-data/html_outfile_1/guided_var.png test-data/html_outfile_1/html_outfile_1.html test-data/html_outfile_1/unguided_1v2.png test-data/html_outfile_1/unguided_npcs.png test-data/html_outfile_1/unguided_var.png test-data/numbers_outfile_1.tabular |
| diffstat | 13 files changed, 330 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/r-gpca.R Mon Nov 19 10:15:19 2018 -0500 @@ -0,0 +1,186 @@ +#!/usr/bin/env Rscript +library(optparse) +library(gPCA) + + +# parse options +option_list = list( + make_option( + c("--version"), + action = "store_true", + default = FALSE, + help = "Print version and exit" + ), + make_option( + c("-i", "--batch"), + action = "store", + default = NA, + type = 'character', + help = "Input batch" + ), + make_option( + c("-d", "--data"), + action = "store", + default = NA, + type = 'character', + help = "Input data" + ), + make_option( + c("-n", "--nperm"), + action = "store", + default = 1000, + type = 'numeric', + help = "Number of permutations to preform" + ), + make_option( + c("-c", "--center"), + action = "store_true", + default = FALSE, + help = "Is the input batch and data centered [default %default]" + ), + make_option( + c("-y", "--scaleY"), + action = "store_true", + default = FALSE, + help = "Scale to Y axis" + ), + make_option( + c("-f", "--filt"), + action = "store", + default = NULL, + type = 'numeric', + help = "Retain features" + ), + make_option( + c( "--npcs"), + action = "store", + default = 3, + type = 'numeric', + help = "Number of principal components to plot" + ), + make_option( + c("-p", "--path"), + action = "store", + default = '$html_outfile.extra_files_path', + type = 'character', + help = "File path" + ), + make_option( + c("-s", "--seed"), + action = "store", + default = NULL, + type = 'numeric', + help = "Set a seed number" + ), + make_option( + c("-x", "--numbers_outfile"), + action = "store", + default = NA, + type = 'character', + help = "Numbers output" + ), + make_option( + c("-o", "--html_outfile"), + action = "store", + default = NA, + type = 'character', + help = "Output", + ) +) + +opt <-parse_args(OptionParser(option_list = option_list)) + +if (opt$version){ + # print version and exit + cat(paste("gPCA version", toString(packageVersion("gPCA"))), "\n") + quit() +} + +# Check parameter values + +if ( ! file.exists(opt$batch)){ + stop((paste('File for batch', opt$batch, 'does not exist'))) +} +if ( ! file.exists(opt$data)){ + stop((paste('File for data', opt$data, 'does not exist'))) +} + +batch<-as.numeric(factor(as.matrix(read.delim(opt$batch, header=TRUE, row.names=1, sep="\t")))) + +data<-t(as.matrix(read.delim(opt$data, row.names=1, header=TRUE, sep="\t"))) + +out<-gPCA.batchdetect(x=data, batch=batch, center=opt$center, scaleY=opt$scaleY, nperm=opt$nperm, filt=opt$filt, seed=opt$seed) + +out$varPCg1<-((out$varPCg1-out$varPCu1)/out$varPCg1)*100 + +dir.create(opt$path); + +write.table(data.frame(out$delta, out$p.val, out$varPCg1),file=opt$numbers_outfile, sep="\t", row.name=FALSE, quote=FALSE) + +# General Distribution +par(mai=c(0.8,0.8,0.1,0.1),cex=0.8) +png(paste(opt$path,'/gDist.png', sep=""), width=1020, height=800, units='px') +gDist(out) +invisible(dev.off()) + + +# Guided/Unguided 1v2 +par(mai=c(0.8,0.8,0.1,0.1),cex=0.8) +png(paste(opt$path,'/guided_1v2.png', sep=""), width=1020, height=800, units='px') +PCplot(out,ug="unguided",type="1v2") +invisible(dev.off()) + +par(mai=c(0.8,0.8,0.1,0.1),cex=0.8) +png(paste(opt$path,'/unguided_1v2.png', sep=""), width=1020, height=800, units='px') +PCplot(out,ug="unguided",type="1v2") +invisible(dev.off()) + + +# Guided/Unguided comp,3 +par(mai=c(0.65,0.65,0.1,0.1),cex=0.8) +png(paste(opt$path,'/guided_npcs.png', sep=""), width=1020, height=800, units='px') +PCplot(out,ug="guided",type="comp",npcs=opt$npcs) +invisible(dev.off()) + +par(mai=c(0.65,0.65,0.1,0.1),cex=0.8) +png(paste(opt$path,'/unguided_npcs.png', sep=""), width=1020, height=800, units='px') +PCplot(out,ug="unguided",type="comp",npcs=opt$npcs) +invisible(dev.off()) + + +# Guided/Unguided CumlativeVarPlot +par(mai=c(0.8,0.8,0.1,0.1),cex=0.8) +png(paste(opt$path,'/guided_var.png', sep=""), width=1020, height=800, units='px') +CumulativeVarPlot(out,ug="guided",col="blue") +invisible(dev.off()) + +par(mai=c(0.8,0.8,0.1,0.1),cex=0.8) +png(paste(opt$path,'/unguided_var.png', sep=""), width=1020, height=800, units='px') +CumulativeVarPlot(out,ug="unguided",col="blue") +invisible(dev.off()) + + +write(paste('<html> +<table id="r-gpca-wrap" align="center" border="1"> + <tr> + <th>Delta</th><th>P-value</th><th>varPCg1</th> + </tr> + <tr> + <td id=delta>',out$delta,'</td><td id=p.val>',out$p.val,'</td><td id=varPCg1>',out$varPCg1,'</td> + </tr> +</table> + +<center><img src="gDist.png"/></center><br> +<center><title>Guided 1v2</title><br><img src="guided_1v2.png"/></center><br> +<center><title>Unguided 1v2</title><br><img src="unguided_1v2.png"/></center><br> + +<center><title>Guided Compare to ',opt$npcs,'</title><br><img src="guided_npcs.png"/></center><br> +<center><title>Unguided Compare to ',opt$npcs,'</title><br><img src="unguided_npcs.png"/></center><br> + +<center><title>Guided Cumulative Variance</title><br><img src="guided_var.png"/></center><br> +<center><title>Unguided Cumulative Variance</title><br><img src="unguided_var.png"/></center><br> + +</html>' +),file = opt$html_outfile) + +sessionInfo()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/r-gpca.xml Mon Nov 19 10:15:19 2018 -0500 @@ -0,0 +1,108 @@ +<tool id="gpca" name="gPCA" version="1.0.0"> + <description>Batch Effect Detection via Guided Principal Components Analysis</description> + <requirements> + <requirement type="package" version="1.0">r-gpca</requirement> + <requirement type="package" version="1.6.0">r-optparse</requirement> + </requirements> + <version_command><![CDATA['$__tool_directory__/r-gpca.R' --version]]></version_command> + <command detect_errors="exit_code"> + <![CDATA[ + + Rscript '$__tool_directory__/r-gpca.R' + -i '${batch}' + -d '${data}' + -n '${nperm}' + ${center} + ${scaleY} + #if str($filt): + -f '${filt}' + #end if + #if str($seed): + -s '${seed}' + #end if + --npcs '${npcs}' + -p '${html_outfile.extra_files_path}' + -x '${numbers_outfile}' + -o '${html_outfile}'; + + ]]></command> + + <inputs> + <param type="data" name="data" argument="--data" format="tabular" label="Data input file" /> + <param type="data" name="batch" argument="--batch" format="tabular" label="Batch input file" /> + <param type="integer" name="nperm" argument="--nperm" value="1000" help="default=1000" label="Number of permutations to preform"/> + <param type="boolean" name="center" argument="--center" checked="false" truevalue="--center" falsevalue="" help="Set to true to prevent gPCA from centering" label="Is the data centered?"/> + <param type="boolean" name="scaleY" argument="--scaleY" checked="false" truevalue="--scaleY" falsevalue="" help="default=No" label="Should Y be scaled based on number of samples in each batch?"/> + <param type="integer" name="filt" optional="true" argument="--filt" min="1" value="" label="How many features to keep?" help="Leave blank for default(NULL)"/> + <param type="integer" name="seed" optional="true" argument="--seed" min="1" value="" label="Seed #" help="Leave blank for default(NULL)"/> + <param type="integer" name="npcs" optional="false" argument="--npcs" min="3" value="3" label="Principal Components to plot" help="Number of principal components to plot"/> + </inputs> + + <outputs> + <data name="html_outfile" format="html" label="${tool.name} on ${on_string} html"/> + <data name="numbers_outfile" format="tabular" label="${tool.name} on ${on_string}: values" /> + </outputs> + + <tests> + <test> + <param name="batch" ftype="tabular" value="batch_in_1.tabular" /> + <param name="data" ftype="tabular" value="data_in_1.tabular" /> + <param name="nperm" value="1000" /> + <param name="center" value="False" /> + <param name="scaleY" value="False" /> + <param name="filt" value="" /> + <param name="npcs" value="3" /> + <param name="seed" value="123456" /> + <output name="html_outfile" ftype="html" file="html_outfile_1/html_outfile_1.html"> + <extra_files type="file" name="gDist.png" value="html_outfile_1/gDist.png" compare="sim_size"/> + <extra_files type="file" name="guided_1v2.png" value="html_outfile_1/guided_1v2.png" compare="sim_size"/> + <extra_files type="file" name="unguided_1v2.png" value="html_outfile_1/unguided_1v2.png" compare="sim_size"/> + <extra_files type="file" name="guided_npcs.png" value="html_outfile_1/guided_npcs.png" compare="sim_size"/> + <extra_files type="file" name="unguided_npcs.png" value="html_outfile_1/unguided_npcs.png" compare="sim_size"/> + <extra_files type="file" name="guided_var.png" value="html_outfile_1/guided_var.png" compare="sim_size"/> + <extra_files type="file" name="unguided_var.png" value="html_outfile_1/unguided_var.png" compare="sim_size"/> + </output> + <output name="numbers_outfile" ftype="tabular" file="numbers_outfile_1.tabular"/> + </test> + </tests> + + <help> + <![CDATA[ + + **What it does** + Utilizes guided principal components analysis for the detection of batch effects in high-throughput data. + + Arguments: + + batch: a length n vector that indicates batch (group or class) for each observation. + + x/data: an nxp matrix of data where n denotes observations and p denotes the numberof features (e.g. probe, gene, SNP, etc.). + + filt: (optional) the number of features to retain after applying a variance filter. If NULL, no filter is applied. Filtering can significantly reduce the processing time in the case of very large data sets. + + nperm: the number of permutations to perform for the permutation test, default is 1000. + + center: (logical) Is your data x centered? If not, then center=FALSE and gPCA.batchdetect will center it for you. + + scaleY: (logical) Do you want to scale the Y matrix by the number of samples in each batch? If not, then center=FALSE (default), otherwise, center=TRUE. + + seed: the seed number for set.seed(). Default is NULL. + + Literature: + + https://cran.r-project.org/web/packages/gPCA/gPCA.pdf + + https://cran.r-project.org/web/packages/gPCA/vignettes/gPCA.pdf + ]]> + </help> + + <citations> + <citation type="doi">10.1093/bioinformatics/btt480</citation> + <citation type="bibtex">@unpublished{gpca:2018, + title = "gPCA Galaxy Tool", + author = "Chris Lowe, Daniel Blankenberg", + url = "https://github.com/BlankenbergLab/galaxy-tools-blankenberg/tree/master/tools/r-gpca", + year = "2018 (accessed November 16, 2018)"} + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/batch_in_1.tabular Mon Nov 19 10:15:19 2018 -0500 @@ -0,0 +1,2 @@ +#batch sample_1 sample_2 sample_3 sample_4 sample_5 sample_6 sample_7 sample_8 sample_9 sample_10 +time_period batch_1 batch_3 batch_3 batch_3 batch_2 batch_2 batch_1 batch_2 batch_3 batch_3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/data_in_1.tabular Mon Nov 19 10:15:19 2018 -0500 @@ -0,0 +1,11 @@ +#feature sample_1 sample_2 sample_3 sample_4 sample_5 sample_6 sample_7 sample_8 sample_9 sample_10 +feature_1 1.74952393913539 1.6739529249317 -2.71641723445615 0.895354674014103 -1.18383993129495 -1.75127636793225 -2.31029339131191 0.793766087160378 1.3935397883822 -3.96218563865692 +feature_2 1.16893865919845 -3.51406256465814 0.455090475971535 -2.13304127796526 -2.62874555718239 1.80409061129936 -2.17682319950269 2.6241768514842 1.2742993822058 -3.42407446944136 +feature_3 2.39896813272329 -3.35153914200262 2.58177487886913 -4.47154846386962 -4.60481257982491 0.724429447466709 -2.72040689316203 1.5526264468751 2.27411893974989 -3.10174037010434 +feature_4 0.769706277557876 -2.60297853853694 2.26504504946117 -3.73465862148677 -2.09259003217505 1.70331395464047 -1.99263254105004 1.17192052086955 2.23707867457778 -2.894282863192 +feature_5 2.31411568136784 -3.75995659147112 1.64923584729505 -4.42186433595126 -2.00336067137353 2.97712035244198 -4.97418199048249 1.51211587005453 2.42293761014066 -2.73075288666884 +feature_6 2.21891525583109 -2.4436233887657 1.05438929743106 -3.46364414372677 -2.44604147468078 1.16087584944093 -2.52395292464374 0.702809594333191 1.60908123060767 -3.33643910453829 +feature_7 1.80728541774419 -2.85772357185281 0.920121600876659 -2.51188018410717 -2.69228640760197 1.95720836059593 -2.80768246809752 0.369114413868452 1.60433720239336 -1.12904146856823 +feature_8 2.04087850933637 -3.39761432466996 0.78752941468568 -3.4531969900781 -3.49624377726172 1.76272395685181 -3.13939004852551 2.79966721181393 1.97976571266901 -3.80633309732772 +feature_9 -3.41445914612496 2.18268228473137 -2.31074303702276 2.58059964125049 -2.56482429357585 -1.71273401701983 2.16230886925131 1.69099778494149 2.47397641001459 -3.13160239840691 +feature_10 -2.4801501229036 0.277265548866568 -2.72716618514931 1.6517716429499 -2.08301939352716 -1.88620555973345 1.58528847585932 1.61929447444201 1.43353547610189 -2.6799919776754
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/html_outfile_1/html_outfile_1.html Mon Nov 19 10:15:19 2018 -0500 @@ -0,0 +1,21 @@ +<html> +<table id="r-gpca-wrap" align="center" border="1"> + <tr> + <th>Delta</th><th>P-value</th><th>varPCg1</th> + </tr> + <tr> + <td id=delta> 0.824447005030577 </td><td id=p.val> 0.512 </td><td id=varPCg1> 20.6295699072817 </td> + </tr> +</table> + +<center><img src="gDist.png"/></center><br> +<center><title>Guided 1v2</title><br><img src="guided_1v2.png"/></center><br> +<center><title>Unguided 1v2</title><br><img src="unguided_1v2.png"/></center><br> + +<center><title>Guided Compare to 3 </title><br><img src="guided_npcs.png"/></center><br> +<center><title>Unguided Compare to 3 </title><br><img src="unguided_npcs.png"/></center><br> + +<center><title>Guided Cumulative Variance</title><br><img src="guided_var.png"/></center><br> +<center><title>Unguided Cumulative Variance</title><br><img src="unguided_var.png"/></center><br> + +</html>
