Mercurial > repos > blankenberglab > gpca
changeset 1:d93629e79f5e draft
planemo upload for repository https://github.com/blankenberglab/galaxy-tools-blankenberg/tree/master/tools/r-gpca commit 3b791d44853d8928046914cd93112bdcf89d4965
line wrap: on
line diff
--- a/r-gpca.R Mon Nov 19 10:15:19 2018 -0500 +++ b/r-gpca.R Tue Nov 27 11:02:46 2018 -0500 @@ -1,6 +1,7 @@ #!/usr/bin/env Rscript library(optparse) library(gPCA) +options(bitmapType='cairo') # parse options @@ -105,82 +106,92 @@ stop((paste('File for data', opt$data, 'does not exist'))) } -batch<-as.numeric(factor(as.matrix(read.delim(opt$batch, header=TRUE, row.names=1, sep="\t")))) - +dir.create(opt$path); data<-t(as.matrix(read.delim(opt$data, row.names=1, header=TRUE, sep="\t"))) +batch<-as.matrix(read.delim(opt$batch, header=TRUE, row.names=1, sep="\t")) -out<-gPCA.batchdetect(x=data, batch=batch, center=opt$center, scaleY=opt$scaleY, nperm=opt$nperm, filt=opt$filt, seed=opt$seed) +write('<html><body><a name="top"></a>',file = opt$html_outfile, append=TRUE) +write('#Batch\tDelta\tP-value\tCumulative Variance',file = opt$numbers_outfile, append=TRUE) +write('<div align="left><ul id =""><p><b>Table of Contents</b></p></ul></div>', file=opt$html_outfile, append=TRUE) + +for (row in 1:nrow(batch)) { + write(paste0('<li><a href="#',row.names(batch)[row],'">',row.names(batch)[row],'</a></li>'), file=opt$html_outfile, append=TRUE) +} +for (row in 1:nrow(batch)) { + batch1<-as.numeric(factor(batch[row,])) + +out<-gPCA.batchdetect(x=data, batch=batch1, center=opt$center, scaleY=opt$scaleY, nperm=opt$nperm, filt=opt$filt, seed=opt$seed) out$varPCg1<-((out$varPCg1-out$varPCu1)/out$varPCg1)*100 -dir.create(opt$path); - -write.table(data.frame(out$delta, out$p.val, out$varPCg1),file=opt$numbers_outfile, sep="\t", row.name=FALSE, quote=FALSE) +write(paste(row.names(batch)[row],out$delta,out$p.val,out$varPCg1,sep="\t"),file = opt$numbers_outfile, append=TRUE) # General Distribution par(mai=c(0.8,0.8,0.1,0.1),cex=0.8) -png(paste(opt$path,'/gDist.png', sep=""), width=1020, height=800, units='px') +png(paste(opt$path,'/gDist_',row,'.png', sep=""), width=1020, height=800, units='px') gDist(out) invisible(dev.off()) # Guided/Unguided 1v2 par(mai=c(0.8,0.8,0.1,0.1),cex=0.8) -png(paste(opt$path,'/guided_1v2.png', sep=""), width=1020, height=800, units='px') +png(paste(opt$path,'/guided_1v2_',row,'.png', sep=""), width=1020, height=800, units='px') PCplot(out,ug="unguided",type="1v2") invisible(dev.off()) par(mai=c(0.8,0.8,0.1,0.1),cex=0.8) -png(paste(opt$path,'/unguided_1v2.png', sep=""), width=1020, height=800, units='px') +png(paste(opt$path,'/unguided_1v2_',row,'.png', sep=""), width=1020, height=800, units='px') PCplot(out,ug="unguided",type="1v2") invisible(dev.off()) # Guided/Unguided comp,3 par(mai=c(0.65,0.65,0.1,0.1),cex=0.8) -png(paste(opt$path,'/guided_npcs.png', sep=""), width=1020, height=800, units='px') +png(paste(opt$path,'/guided_npcs_',row,'.png', sep=""), width=1020, height=800, units='px') PCplot(out,ug="guided",type="comp",npcs=opt$npcs) invisible(dev.off()) par(mai=c(0.65,0.65,0.1,0.1),cex=0.8) -png(paste(opt$path,'/unguided_npcs.png', sep=""), width=1020, height=800, units='px') +png(paste(opt$path,'/unguided_npcs_',row,'.png', sep=""), width=1020, height=800, units='px') PCplot(out,ug="unguided",type="comp",npcs=opt$npcs) invisible(dev.off()) # Guided/Unguided CumlativeVarPlot par(mai=c(0.8,0.8,0.1,0.1),cex=0.8) -png(paste(opt$path,'/guided_var.png', sep=""), width=1020, height=800, units='px') +png(paste(opt$path,'/guided_var_',row,'.png', sep=""), width=1020, height=800, units='px') CumulativeVarPlot(out,ug="guided",col="blue") invisible(dev.off()) par(mai=c(0.8,0.8,0.1,0.1),cex=0.8) -png(paste(opt$path,'/unguided_var.png', sep=""), width=1020, height=800, units='px') +png(paste(opt$path,'/unguided_var_',row,'.png', sep=""), width=1020, height=800, units='px') CumulativeVarPlot(out,ug="unguided",col="blue") invisible(dev.off()) -write(paste('<html> -<table id="r-gpca-wrap" align="center" border="1"> +write(paste0(' +<header id=',row.names(batch)[row],'><center><font size="11">',row.names(batch)[row],'</font><center></header> +<br> +<table align="center" border="1"> <tr> <th>Delta</th><th>P-value</th><th>varPCg1</th> </tr> <tr> - <td id=delta>',out$delta,'</td><td id=p.val>',out$p.val,'</td><td id=varPCg1>',out$varPCg1,'</td> + <td id="#">',out$delta,'</td><td id=p.val>',out$p.val,'</td><td id=varPCg1>',out$varPCg1,'</td> </tr> </table> +<center><img src="gDist_',row,'.png"/></center><br> +<center><title>Guided 1v2</title><br><img src="guided_1v2_',row,'.png"/></center><br> +<center><title>Unguided 1v2</title><br><img src="unguided_1v2_',row,'.png"/></center><br> -<center><img src="gDist.png"/></center><br> -<center><title>Guided 1v2</title><br><img src="guided_1v2.png"/></center><br> -<center><title>Unguided 1v2</title><br><img src="unguided_1v2.png"/></center><br> +<center><title>Guided Compare to ',opt$npcs,'</title><br><img src="guided_npcs_',row,'.png"/></center><br> +<center><title>Unguided Compare to ',opt$npcs,'</title><br><img src="unguided_npcs_',row,'.png"/></center><br> -<center><title>Guided Compare to ',opt$npcs,'</title><br><img src="guided_npcs.png"/></center><br> -<center><title>Unguided Compare to ',opt$npcs,'</title><br><img src="unguided_npcs.png"/></center><br> +<center><title>Guided Cumulative Variance</title><br><img src="guided_var_',row,'.png"/></center><br> +<center><title>Unguided Cumulative Variance</title><br><img src="unguided_var_',row,'.png"/></center><br> -<center><title>Guided Cumulative Variance</title><br><img src="guided_var.png"/></center><br> -<center><title>Unguided Cumulative Variance</title><br><img src="unguided_var.png"/></center><br> - -</html>' -),file = opt$html_outfile) +'), file = opt$html_outfile, append=TRUE) +} +write(paste('<p><center><a href="#top">Back to Top</a></center></p></body></html>'),file = opt$html_outfile, append=TRUE) sessionInfo()
--- a/r-gpca.xml Mon Nov 19 10:15:19 2018 -0500 +++ b/r-gpca.xml Tue Nov 27 11:02:46 2018 -0500 @@ -35,7 +35,7 @@ <param type="boolean" name="scaleY" argument="--scaleY" checked="false" truevalue="--scaleY" falsevalue="" help="default=No" label="Should Y be scaled based on number of samples in each batch?"/> <param type="integer" name="filt" optional="true" argument="--filt" min="1" value="" label="How many features to keep?" help="Leave blank for default(NULL)"/> <param type="integer" name="seed" optional="true" argument="--seed" min="1" value="" label="Seed #" help="Leave blank for default(NULL)"/> - <param type="integer" name="npcs" optional="false" argument="--npcs" min="3" value="3" label="Principal Components to plot" help="Number of principal components to plot"/> + <param type="integer" name="npcs" optional="false" argument="--npcs" min="3" value="3" label="Principal Components to calculate" help="Number of principal components to calcualte"/> </inputs> <outputs> @@ -54,16 +54,43 @@ <param name="npcs" value="3" /> <param name="seed" value="123456" /> <output name="html_outfile" ftype="html" file="html_outfile_1/html_outfile_1.html"> - <extra_files type="file" name="gDist.png" value="html_outfile_1/gDist.png" compare="sim_size"/> - <extra_files type="file" name="guided_1v2.png" value="html_outfile_1/guided_1v2.png" compare="sim_size"/> - <extra_files type="file" name="unguided_1v2.png" value="html_outfile_1/unguided_1v2.png" compare="sim_size"/> - <extra_files type="file" name="guided_npcs.png" value="html_outfile_1/guided_npcs.png" compare="sim_size"/> - <extra_files type="file" name="unguided_npcs.png" value="html_outfile_1/unguided_npcs.png" compare="sim_size"/> - <extra_files type="file" name="guided_var.png" value="html_outfile_1/guided_var.png" compare="sim_size"/> - <extra_files type="file" name="unguided_var.png" value="html_outfile_1/unguided_var.png" compare="sim_size"/> + <extra_files type="file" name="gDist_1.png" value="html_outfile_1/gDist_1.png" compare="sim_size"/> + <extra_files type="file" name="guided_1v2_1.png" value="html_outfile_1/guided_1v2_1.png" compare="sim_size"/> + <extra_files type="file" name="unguided_1v2_1.png" value="html_outfile_1/unguided_1v2_1.png" compare="sim_size"/> + <extra_files type="file" name="guided_npcs_1.png" value="html_outfile_1/guided_npcs_1.png" compare="sim_size"/> + <extra_files type="file" name="unguided_npcs_1.png" value="html_outfile_1/unguided_npcs_1.png" compare="sim_size"/> + <extra_files type="file" name="guided_var_1.png" value="html_outfile_1/guided_var_1.png" compare="sim_size"/> + <extra_files type="file" name="unguided_var_1.png" value="html_outfile_1/unguided_var_1.png" compare="sim_size"/> </output> <output name="numbers_outfile" ftype="tabular" file="numbers_outfile_1.tabular"/> </test> + <test> + <param name="batch" ftype="tabular" value="batch_in_multiple_1.tabular" /> + <param name="data" ftype="tabular" value="data_in_1.tabular" /> + <param name="nperm" value="1000" /> + <param name="center" value="False" /> + <param name="scaleY" value="False" /> + <param name="filt" value="" /> + <param name="npcs" value="3" /> + <param name="seed" value="123456" /> + <output name="html_outfile" ftype="html" file="html_outfile_2/html_outfile_2.html"> + <extra_files type="file" name="gDist_1.png" value="html_outfile_2/gDist_1.png" compare="sim_size"/> + <extra_files type="file" name="gDist_2.png" value="html_outfile_2/gDist_2.png" compare="sim_size"/> + <extra_files type="file" name="guided_1v2_1.png" value="html_outfile_2/guided_1v2_1.png" compare="sim_size"/> + <extra_files type="file" name="guided_1v2_2.png" value="html_outfile_2/guided_1v2_2.png" compare="sim_size"/> + <extra_files type="file" name="unguided_1v2_1.png" value="html_outfile_2/unguided_1v2_1.png" compare="sim_size"/> + <extra_files type="file" name="unguided_1v2_2.png" value="html_outfile_2/unguided_1v2_2.png" compare="sim_size"/> + <extra_files type="file" name="guided_npcs_1.png" value="html_outfile_2/guided_npcs_1.png" compare="sim_size"/> + <extra_files type="file" name="guided_npcs_2.png" value="html_outfile_2/guided_npcs_2.png" compare="sim_size"/> + <extra_files type="file" name="unguided_npcs_1.png" value="html_outfile_2/unguided_npcs_1.png" compare="sim_size"/> + <extra_files type="file" name="unguided_npcs_2.png" value="html_outfile_2/unguided_npcs_2.png" compare="sim_size"/> + <extra_files type="file" name="guided_var_1.png" value="html_outfile_2/guided_var_1.png" compare="sim_size"/> + <extra_files type="file" name="guided_var_2.png" value="html_outfile_2/guided_var_2.png" compare="sim_size"/> + <extra_files type="file" name="unguided_var_1.png" value="html_outfile_2/unguided_var_1.png" compare="sim_size"/> + <extra_files type="file" name="unguided_var_2.png" value="html_outfile_2/unguided_var_2.png" compare="sim_size"/> + </output> + <output name="numbers_outfile" ftype="tabular" file="numbers_outfile_2.tabular"/> + </test> </tests> <help> @@ -88,11 +115,20 @@ seed: the seed number for set.seed(). Default is NULL. - Literature: + npcs: number of principal components to calculate + + **Literature** https://cran.r-project.org/web/packages/gPCA/gPCA.pdf https://cran.r-project.org/web/packages/gPCA/vignettes/gPCA.pdf + + **Input File Help** + + Batch: The batch file must be a tab delimited file. It will be read as a numeric vector, consisting of as many different batches as you like. The test data file batch_in_multiple_1.tabular shows the correct format. The length of batch must conform to the data matrix or an error will occur. + + Data: The data file must be a tab delimited file. It will be read in as a matrix. The test data file data_in_1.tabular shows the correct format. If you are receving errors about nonconformed matrices in your matrix you might need to transpose your matrix before using it in the tool. + ]]> </help>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/batch_in_multiple_1.tabular Tue Nov 27 11:02:46 2018 -0500 @@ -0,0 +1,3 @@ +#batch sample_1 sample_2 sample_3 sample_4 sample_5 sample_6 sample_7 sample_8 sample_9 sample_10 +time_period batch_1 batch_3 batch_3 batch_3 batch_2 batch_2 batch_1 batch_2 batch_3 batch_3 +random_batch b a c a a c b a c a \ No newline at end of file
--- a/test-data/html_outfile_1/html_outfile_1.html Mon Nov 19 10:15:19 2018 -0500 +++ b/test-data/html_outfile_1/html_outfile_1.html Tue Nov 27 11:02:46 2018 -0500 @@ -1,21 +1,26 @@ -<html> -<table id="r-gpca-wrap" align="center" border="1"> +<html><body><a name="top"></a> +<div align="left><ul id =""><p><b>Table of Contents</b></p></ul></div> +<li><a href="#time_period">time_period</a></li> + +<header id=time_period><center><font size="11">time_period</font><center></header> +<br> +<table align="center" border="1"> <tr> <th>Delta</th><th>P-value</th><th>varPCg1</th> </tr> <tr> - <td id=delta> 0.824447005030577 </td><td id=p.val> 0.512 </td><td id=varPCg1> 20.6295699072817 </td> + <td id="#">0.824447005030577</td><td id=p.val>0.512</td><td id=varPCg1>20.6295699072817</td> </tr> </table> - -<center><img src="gDist.png"/></center><br> -<center><title>Guided 1v2</title><br><img src="guided_1v2.png"/></center><br> -<center><title>Unguided 1v2</title><br><img src="unguided_1v2.png"/></center><br> +<center><img src="gDist_1.png"/></center><br> +<center><title>Guided 1v2</title><br><img src="guided_1v2_1.png"/></center><br> +<center><title>Unguided 1v2</title><br><img src="unguided_1v2_1.png"/></center><br> -<center><title>Guided Compare to 3 </title><br><img src="guided_npcs.png"/></center><br> -<center><title>Unguided Compare to 3 </title><br><img src="unguided_npcs.png"/></center><br> +<center><title>Guided Compare to 3</title><br><img src="guided_npcs_1.png"/></center><br> +<center><title>Unguided Compare to 3</title><br><img src="unguided_npcs_1.png"/></center><br> -<center><title>Guided Cumulative Variance</title><br><img src="guided_var.png"/></center><br> -<center><title>Unguided Cumulative Variance</title><br><img src="unguided_var.png"/></center><br> +<center><title>Guided Cumulative Variance</title><br><img src="guided_var_1.png"/></center><br> +<center><title>Unguided Cumulative Variance</title><br><img src="unguided_var_1.png"/></center><br> -</html> + +<p><center><a href="#top">Back to Top</a></center></p></body></html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/html_outfile_2/html_outfile_2.html Tue Nov 27 11:02:46 2018 -0500 @@ -0,0 +1,49 @@ +<html><body><a name="top"></a> +<div align="left><ul id =""><p><b>Table of Contents</b></p></ul></div> +<li><a href="#time_period">time_period</a></li> +<li><a href="#random_batch">random_batch</a></li> + +<header id=time_period><center><font size="11">time_period</font><center></header> +<br> +<table align="center" border="1"> + <tr> + <th>Delta</th><th>P-value</th><th>varPCg1</th> + </tr> + <tr> + <td id="#">0.824447005030577</td><td id=p.val>0.512</td><td id=varPCg1>20.6295699072817</td> + </tr> +</table> +<center><img src="gDist_1.png"/></center><br> +<center><title>Guided 1v2</title><br><img src="guided_1v2_1.png"/></center><br> +<center><title>Unguided 1v2</title><br><img src="unguided_1v2_1.png"/></center><br> + +<center><title>Guided Compare to 3</title><br><img src="guided_npcs_1.png"/></center><br> +<center><title>Unguided Compare to 3</title><br><img src="unguided_npcs_1.png"/></center><br> + +<center><title>Guided Cumulative Variance</title><br><img src="guided_var_1.png"/></center><br> +<center><title>Unguided Cumulative Variance</title><br><img src="unguided_var_1.png"/></center><br> + + + +<header id=random_batch><center><font size="11">random_batch</font><center></header> +<br> +<table align="center" border="1"> + <tr> + <th>Delta</th><th>P-value</th><th>varPCg1</th> + </tr> + <tr> + <td id="#">0.97789000173787</td><td id=p.val>0.086</td><td id=varPCg1>19.260564791629</td> + </tr> +</table> +<center><img src="gDist_2.png"/></center><br> +<center><title>Guided 1v2</title><br><img src="guided_1v2_2.png"/></center><br> +<center><title>Unguided 1v2</title><br><img src="unguided_1v2_2.png"/></center><br> + +<center><title>Guided Compare to 3</title><br><img src="guided_npcs_2.png"/></center><br> +<center><title>Unguided Compare to 3</title><br><img src="unguided_npcs_2.png"/></center><br> + +<center><title>Guided Cumulative Variance</title><br><img src="guided_var_2.png"/></center><br> +<center><title>Unguided Cumulative Variance</title><br><img src="unguided_var_2.png"/></center><br> + + +<p><center><a href="#top">Back to Top</a></center></p></body></html>
--- a/test-data/numbers_outfile_1.tabular Mon Nov 19 10:15:19 2018 -0500 +++ b/test-data/numbers_outfile_1.tabular Tue Nov 27 11:02:46 2018 -0500 @@ -1,2 +1,2 @@ -out.delta out.p.val out.varPCg1 -0.824447005030577 0.512 20.6295699072817 +#Batch Delta P-value Cumulative Variance +time_period 0.824447005030577 0.512 20.6295699072817
