annotate pca.xml @ 82:057169fb41c7 draft

Uploaded
author bernhardlutz
date Mon, 20 Jan 2014 15:08:40 -0500
parents c4a3a8999945
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
80
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
1 <tool id="pca1" name="Principal Component Analysis" version="1.1.0">
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
2 <description> </description>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
3 <expand macro="requirements" />
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
4 <macros>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
5 <import>statistic_tools_macros.xml</import>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
6 </macros>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
7 <command interpreter="python">
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
8 pca.py
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
9 $input1
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
10 $var_cols
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
11 $methodChoice.method
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
12 $out_file1
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
13 $out_file2
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
14 #if $methodChoice.method == "svd":
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
15 $methodChoice.scale
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
16 #end if
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
17 </command>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
18 <inputs>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
19 <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
20 <param name="var_cols" label="Select columns containing input variables " type="data_column" data_ref="input1" numerical="True" multiple="true" >
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
21 <validator type="no_options" message="Please select at least one column."/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
22 </param>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
23 <conditional name="methodChoice">
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
24 <param name="method" type="select" label="Method" help="The correlation matrix can only be used if there are no constant variables">
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
25 <option value="cor" selected="true">Eigenvectors of Correlation (princomp)</option>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
26 <option value="cov">Eigenvectors of Covariance (princomp)</option>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
27 <option value="svd">Singular Value Decomposition (prcomp)</option>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
28 </param>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
29 <when value="cor" />
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
30 <when value="cov" />
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
31 <when value="svd">
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
32 <param name="scale" type="select" label="Centering and Scaling" help="Can be used to center and/or scale variables">
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
33 <option value="none" selected="true">None</option>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
34 <option value="center">Center only</option>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
35 <option value="scale">Scale only</option>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
36 <option value="both">Center and Scale</option>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
37 </param>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
38 </when>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
39 </conditional>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
40 </inputs>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
41 <outputs>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
42 <data format="input" name="out_file1" metadata_source="input1" />
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
43 <data format="pdf" name="out_file2" />
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
44 </outputs>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
45 <tests>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
46 <test>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
47 <param name="input1" value="iris.tabular"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
48 <param name="var_cols" value="1,2,3,4"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
49 <param name="method" value="cor"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
50 <output name="out_file1" file="pca_out1.tabular"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
51 <output name="out_file2" file="pca_out2.pdf"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
52 </test>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
53 <test>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
54 <param name="input1" value="iris.tabular"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
55 <param name="var_cols" value="1,2,3,4"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
56 <param name="method" value="cov"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
57 <output name="out_file1" file="pca_out3.tabular"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
58 <output name="out_file2" file="pca_out4.pdf"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
59 </test>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
60 <test>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
61 <param name="input1" value="iris.tabular"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
62 <param name="var_cols" value="1,2,3,4"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
63 <param name="method" value="svd"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
64 <param name="scale" value="both"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
65 <output name="out_file1" file="pca_out5.tabular"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
66 <output name="out_file2" file="pca_out6.pdf"/>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
67 </test>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
68 </tests>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
69 <help>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
70
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
71
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
72 .. class:: infomark
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
73
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
74 **TIP:** If your data is not TAB delimited, use *Edit Datasets-&gt;Convert characters*
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
75
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
76 -----
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
77
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
78 .. class:: infomark
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
79
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
80 **What it does**
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
81
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
82 This tool performs Principal Component Analysis on the given numeric input data using functions from R statistical package - 'princomp' function (for Eigenvector based solution) and 'prcomp' function (for Singular value decomposition based solution). It outputs two files, one containing the summary statistics of PCA, and the other containing biplots of the observations and principal components.
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
83
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
84 *R Development Core Team (2009). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. ISBN 3-900051-07-0, URL http://www.R-project.org.*
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
85
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
86 -----
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
87
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
88 .. class:: warningmark
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
89
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
90 **Note**
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
91
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
92 - This tool currently treats all variables as continuous numeric variables. Running the tool on categorical variables might result in incorrect results. Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
93
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
94 - The summary statistics in the output are described below:
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
95
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
96 - Std. deviation: Standard deviations of the principal components
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
97 - Loadings: a list of eigen-vectors/variable loadings
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
98 - Scores: Scores of the input data on the principal components
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
99
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
100 </help>
c4a3a8999945 Uploaded
bernhardlutz
parents:
diff changeset
101 </tool>