diff r-gpca.xml @ 0:ed24bacaa4d0 draft

planemo upload for repository https://github.com/blankenberglab/galaxy-tools-blankenberg/tree/master/tools/r-gpca commit ec2f585063522efa001e4d2a639e92a67758ad8c-dirty
author blankenberglab
date Mon, 19 Nov 2018 10:15:19 -0500
parents
children d93629e79f5e
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/r-gpca.xml	Mon Nov 19 10:15:19 2018 -0500
@@ -0,0 +1,108 @@
+<tool id="gpca" name="gPCA" version="1.0.0">
+  <description>Batch Effect Detection via Guided Principal Components Analysis</description>
+    <requirements>
+        <requirement type="package" version="1.0">r-gpca</requirement>
+        <requirement type="package" version="1.6.0">r-optparse</requirement>
+    </requirements>
+    <version_command><![CDATA['$__tool_directory__/r-gpca.R' --version]]></version_command>
+    <command detect_errors="exit_code">
+    <![CDATA[
+      
+      Rscript '$__tool_directory__/r-gpca.R'
+        -i '${batch}'
+        -d '${data}'
+        -n '${nperm}'
+        ${center}
+        ${scaleY}
+        #if str($filt):
+            -f '${filt}'
+        #end if
+        #if str($seed):
+            -s '${seed}'
+        #end if
+        --npcs '${npcs}'
+        -p '${html_outfile.extra_files_path}'
+        -x '${numbers_outfile}'
+        -o '${html_outfile}';
+
+    ]]></command>
+    
+    <inputs>
+        <param type="data" name="data"  argument="--data" format="tabular"   label="Data input file" />
+        <param type="data" name="batch" argument="--batch" format="tabular" label="Batch input file" />
+        <param type="integer" name="nperm" argument="--nperm" value="1000" help="default=1000" label="Number of permutations to preform"/>
+        <param type="boolean" name="center" argument="--center" checked="false" truevalue="--center" falsevalue="" help="Set to true to prevent gPCA from centering" label="Is the data centered?"/>
+        <param type="boolean" name="scaleY" argument="--scaleY" checked="false" truevalue="--scaleY" falsevalue="" help="default=No" label="Should Y be scaled based on number of samples in each batch?"/>
+        <param type="integer" name="filt" optional="true" argument="--filt"  min="1" value="" label="How many features to keep?" help="Leave blank for default(NULL)"/>
+        <param type="integer" name="seed" optional="true" argument="--seed"  min="1" value="" label="Seed #" help="Leave blank for default(NULL)"/>
+        <param type="integer" name="npcs" optional="false" argument="--npcs"  min="3" value="3" label="Principal Components to plot" help="Number of principal components to plot"/>
+    </inputs>
+
+    <outputs>
+        <data name="html_outfile" format="html" label="${tool.name} on ${on_string} html"/>
+        <data name="numbers_outfile" format="tabular" label="${tool.name} on ${on_string}: values" />
+    </outputs>
+
+    <tests>
+      <test>
+        <param name="batch" ftype="tabular" value="batch_in_1.tabular"  />
+        <param name="data" ftype="tabular" value="data_in_1.tabular"  />
+        <param name="nperm" value="1000" />
+        <param name="center" value="False" />
+        <param name="scaleY" value="False" />
+        <param name="filt" value="" />
+        <param name="npcs" value="3" />
+        <param name="seed" value="123456" />
+        <output name="html_outfile" ftype="html" file="html_outfile_1/html_outfile_1.html">
+          <extra_files type="file" name="gDist.png" value="html_outfile_1/gDist.png" compare="sim_size"/>
+          <extra_files type="file" name="guided_1v2.png" value="html_outfile_1/guided_1v2.png" compare="sim_size"/>
+          <extra_files type="file" name="unguided_1v2.png" value="html_outfile_1/unguided_1v2.png" compare="sim_size"/>
+          <extra_files type="file" name="guided_npcs.png" value="html_outfile_1/guided_npcs.png" compare="sim_size"/>
+          <extra_files type="file" name="unguided_npcs.png" value="html_outfile_1/unguided_npcs.png" compare="sim_size"/>
+          <extra_files type="file" name="guided_var.png" value="html_outfile_1/guided_var.png" compare="sim_size"/>
+          <extra_files type="file" name="unguided_var.png" value="html_outfile_1/unguided_var.png" compare="sim_size"/>
+        </output>
+        <output name="numbers_outfile" ftype="tabular" file="numbers_outfile_1.tabular"/>
+      </test>
+    </tests>
+    
+    <help>
+      <![CDATA[
+
+      **What it does**
+      Utilizes guided principal components analysis for the detection of batch effects in high-throughput data.
+
+      Arguments:
+
+      batch: a length n vector that indicates batch (group or class) for each observation.
+
+      x/data:  an nxp matrix of data where n denotes observations and p denotes the numberof features (e.g. probe, gene, SNP, etc.).
+
+      filt:  (optional) the number of features to retain after applying a variance filter. If NULL,  no  filter  is  applied. Filtering  can  significantly  reduce  the  processing time in the case of very large data sets.
+
+      nperm: the number of permutations to perform for the permutation test, default is 1000.
+
+      center:  (logical) Is your data x centered? If not, then center=FALSE and gPCA.batchdetect will center it for you.
+
+      scaleY:  (logical) Do you want to scale the Y matrix by the number of samples in each batch? If not, then center=FALSE (default), otherwise, center=TRUE.
+
+      seed:  the seed number for set.seed(). Default is NULL.
+
+      Literature:
+
+      https://cran.r-project.org/web/packages/gPCA/gPCA.pdf
+
+      https://cran.r-project.org/web/packages/gPCA/vignettes/gPCA.pdf
+      ]]>
+    </help>
+    
+  <citations>
+    <citation type="doi">10.1093/bioinformatics/btt480</citation>
+    <citation type="bibtex">@unpublished{gpca:2018,
+      title  = "gPCA Galaxy Tool",
+      author = "Chris Lowe, Daniel Blankenberg",
+      url    = "https://github.com/BlankenbergLab/galaxy-tools-blankenberg/tree/master/tools/r-gpca",
+      year   = "2018 (accessed November 16, 2018)"}
+    </citation>
+  </citations>
+</tool>