annotate correlation_matrix.xml @ 3:89009e9b7eb0 draft

Uploaded
author sauria
date Thu, 27 Apr 2017 17:28:27 -0400
parents f0c8cdd78e28
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
1 <tool id="tableToCorrMatrix" name="Table to Correlation Matrix" version="1.0.0">
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
2 <command interpreter="python">correlation_matrix.py ${corr} ${feature} ${row} ${column} ${int} ${input} ${out_file1}</command>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
3 <requirements>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
4 <requirement type='package' version="1.12.1">numpy</requirement>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
5 <requirement type='package' version="0.19.0">scipy</requirement>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
6 <yield/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
7 </requirements>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
8 <inputs>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
9 <param format="tabular" name="input" type="data" label="Input Table" help="Dataset missing? See TIP below" />
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
10 <param name="feature" type="select" label="Features are in">
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
11 <option value="">Columns</option>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
12 <option value="-f">Rows</option>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
13 </param>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
14 <param name="corr" type="select" label="Correlation Statistic">
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
15 <option value="-t pearson">Pearson</option>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
16 <option value="-t spearman">Spearman</option>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
17 <option value="-t kendall">Kendall</option>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
18 </param>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
19 <param name="row" type="select" label="Row names">
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
20 <option value="-r">True</option>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
21 <option value="">False</option>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
22 </param>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
23 <param name="column" type="select" label="Column names">
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
24 <option value="-c">True</option>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
25 <option value="">False</option>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
26 </param>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
27 <param name="int" type="select" label="Datatype">
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
28 <option value="">Decimal</option>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
29 <option value="-i">Integer</option>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
30 </param>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
31 </inputs>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
32 <outputs>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
33 <data format="input" name="out_file1" metadata_source="input"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
34 </outputs>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
35 <tests>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
36 <test>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
37 <param name="input" value="corr_test_data.txt"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
38 <param name="feature" value=""/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
39 <param name="corr" value="pearson"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
40 <param name="row" value="-r"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
41 <param name="column" value="-c"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
42 <param name="int" value=""/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
43 <output name="out_file1" file="pearson.txt"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
44 </test>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
45 <test>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
46 <param name="input" value="corr_test_data.txt"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
47 <param name="feature" value=""/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
48 <param name="corr" value="spearman"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
49 <param name="row" value="-r"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
50 <param name="column" value="-c"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
51 <param name="int" value=""/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
52 <output name="out_file1" file="spearman.txt"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
53 </test>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
54 <test>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
55 <param name="input" value="corr_test_data.txt"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
56 <param name="feature" value=""/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
57 <param name="corr" value="kendall"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
58 <param name="row" value="-r"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
59 <param name="column" value="-c"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
60 <param name="int" value=""/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
61 <output name="out_file1" file="kendall.txt"/>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
62 </test>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
63 </tests>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
64 <help>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
65 .. class:: infomark
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
66
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
67 **TIP:** If your data is not TAB or SPACE delimited, use *Text Manipulation-&gt;Convert*
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
68
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
69 .. class:: warningmark
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
70
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
71 This tool will not work with missing data ("nan").
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
72
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
73 -----
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
74
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
75 **Syntax**
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
76
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
77 This tool computes the matrix of correlation coefficients between each pairwise combination of samples across all features.
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
78
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
79 - **Pearson's Correlation** reflects the degree of linear relationship between two variables. It ranges from +1 to -1. A correlation of +1 means that there is a perfect positive linear relationship between variables. The formula for Pearson's correlation is:
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
80
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
81 .. image:: pearson.png
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
82
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
83 where n is the number of items
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
84
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
85 - **Kendall's rank correlation** is used to measure the degree of correspondence between two rankings and assessing the significance of this correspondence. The formula for Kendall's rank correlation is:
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
86
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
87 .. image:: kendall.png
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
88
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
89 where n is the number of items, and P is the sum.
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
90
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
91 - **Spearman's rank correlation** assesses how well an arbitrary monotonic function could describe the relationship between two variables, without making any assumptions about the frequency distribution of the variables. The formula for Spearman's rank correlation is
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
92
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
93 .. image:: spearman.png
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
94
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
95 where D is the difference between the ranks of corresponding values of X and Y, and N is the number of pairs of values.
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
96
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
97 -----
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
98
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
99 **Example**
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
100
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
101 - Input file::
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
102
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
103 Sample Feature1 Feature2 Feature3 Feature4 Feature5 Feature6
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
104 A -0.3019 1.6868 -2.5340 -4.9926 -2.1455 7.6550
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
105 B -0.5274 2.0816 -3.4161 -4.5812 -1.8705 6.9659
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
106 C -3.2619 0.0963 -1.0131 1.9299 -0.2277 -0.7781
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
107 D -1.9462 0.3685 -1.3830 1.1512 -0.1307 -0.3001
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
108 E -2.0360 -0.7261 -1.6089 1.0215 -0.1636 -1.6826
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
109 F -2.7152 0.8035 -1.7544 1.7193 -0.1782 -1.0745
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
110
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
111 - Computing the correlation coefficients between every pair of rows of the above file (using Pearson's Correlation), the output is::
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
112
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
113 sample A B C D E F
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
114 A 1.0000 0.9923 -0.3144 -0.1313 -0.5773 -0.2793
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
115 B 0.9923 1.0000 -0.2575 -0.0462 -0.5011 -0.1915
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
116 C -0.3144 -0.2575 1.0000 0.9443 0.8916 0.9470
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
117 D -0.1313 -0.0462 0.9443 1.0000 0.8679 0.9793
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
118 E -0.5773 -0.5011 0.8916 0.8679 1.0000 0.9046
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
119 F -0.2793 -0.1915 0.9470 0.9793 0.9046 1.0000
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
120
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
121 </help>
f0c8cdd78e28 Uploaded
sauria
parents:
diff changeset
122 </tool>