comparison correlation_matrix.xml @ 2:f0c8cdd78e28 draft

Uploaded
author sauria
date Thu, 27 Apr 2017 12:37:59 -0400
parents
children
comparison
equal deleted inserted replaced
1:9aeb70cf7a41 2:f0c8cdd78e28
1 <tool id="tableToCorrMatrix" name="Table to Correlation Matrix" version="1.0.0">
2 <command interpreter="python">correlation_matrix.py ${corr} ${feature} ${row} ${column} ${int} ${input} ${out_file1}</command>
3 <requirements>
4 <requirement type='package' version="1.12.1">numpy</requirement>
5 <requirement type='package' version="0.19.0">scipy</requirement>
6 <yield/>
7 </requirements>
8 <inputs>
9 <param format="tabular" name="input" type="data" label="Input Table" help="Dataset missing? See TIP below" />
10 <param name="feature" type="select" label="Features are in">
11 <option value="">Columns</option>
12 <option value="-f">Rows</option>
13 </param>
14 <param name="corr" type="select" label="Correlation Statistic">
15 <option value="-t pearson">Pearson</option>
16 <option value="-t spearman">Spearman</option>
17 <option value="-t kendall">Kendall</option>
18 </param>
19 <param name="row" type="select" label="Row names">
20 <option value="-r">True</option>
21 <option value="">False</option>
22 </param>
23 <param name="column" type="select" label="Column names">
24 <option value="-c">True</option>
25 <option value="">False</option>
26 </param>
27 <param name="int" type="select" label="Datatype">
28 <option value="">Decimal</option>
29 <option value="-i">Integer</option>
30 </param>
31 </inputs>
32 <outputs>
33 <data format="input" name="out_file1" metadata_source="input"/>
34 </outputs>
35 <tests>
36 <test>
37 <param name="input" value="corr_test_data.txt"/>
38 <param name="feature" value=""/>
39 <param name="corr" value="pearson"/>
40 <param name="row" value="-r"/>
41 <param name="column" value="-c"/>
42 <param name="int" value=""/>
43 <output name="out_file1" file="pearson.txt"/>
44 </test>
45 <test>
46 <param name="input" value="corr_test_data.txt"/>
47 <param name="feature" value=""/>
48 <param name="corr" value="spearman"/>
49 <param name="row" value="-r"/>
50 <param name="column" value="-c"/>
51 <param name="int" value=""/>
52 <output name="out_file1" file="spearman.txt"/>
53 </test>
54 <test>
55 <param name="input" value="corr_test_data.txt"/>
56 <param name="feature" value=""/>
57 <param name="corr" value="kendall"/>
58 <param name="row" value="-r"/>
59 <param name="column" value="-c"/>
60 <param name="int" value=""/>
61 <output name="out_file1" file="kendall.txt"/>
62 </test>
63 </tests>
64 <help>
65 .. class:: infomark
66
67 **TIP:** If your data is not TAB or SPACE delimited, use *Text Manipulation-&gt;Convert*
68
69 .. class:: warningmark
70
71 This tool will not work with missing data ("nan").
72
73 -----
74
75 **Syntax**
76
77 This tool computes the matrix of correlation coefficients between each pairwise combination of samples across all features.
78
79 - **Pearson's Correlation** reflects the degree of linear relationship between two variables. It ranges from +1 to -1. A correlation of +1 means that there is a perfect positive linear relationship between variables. The formula for Pearson's correlation is:
80
81 .. image:: pearson.png
82
83 where n is the number of items
84
85 - **Kendall's rank correlation** is used to measure the degree of correspondence between two rankings and assessing the significance of this correspondence. The formula for Kendall's rank correlation is:
86
87 .. image:: kendall.png
88
89 where n is the number of items, and P is the sum.
90
91 - **Spearman's rank correlation** assesses how well an arbitrary monotonic function could describe the relationship between two variables, without making any assumptions about the frequency distribution of the variables. The formula for Spearman's rank correlation is
92
93 .. image:: spearman.png
94
95 where D is the difference between the ranks of corresponding values of X and Y, and N is the number of pairs of values.
96
97 -----
98
99 **Example**
100
101 - Input file::
102
103 Sample Feature1 Feature2 Feature3 Feature4 Feature5 Feature6
104 A -0.3019 1.6868 -2.5340 -4.9926 -2.1455 7.6550
105 B -0.5274 2.0816 -3.4161 -4.5812 -1.8705 6.9659
106 C -3.2619 0.0963 -1.0131 1.9299 -0.2277 -0.7781
107 D -1.9462 0.3685 -1.3830 1.1512 -0.1307 -0.3001
108 E -2.0360 -0.7261 -1.6089 1.0215 -0.1636 -1.6826
109 F -2.7152 0.8035 -1.7544 1.7193 -0.1782 -1.0745
110
111 - Computing the correlation coefficients between every pair of rows of the above file (using Pearson's Correlation), the output is::
112
113 sample A B C D E F
114 A 1.0000 0.9923 -0.3144 -0.1313 -0.5773 -0.2793
115 B 0.9923 1.0000 -0.2575 -0.0462 -0.5011 -0.1915
116 C -0.3144 -0.2575 1.0000 0.9443 0.8916 0.9470
117 D -0.1313 -0.0462 0.9443 1.0000 0.8679 0.9793
118 E -0.5773 -0.5011 0.8916 0.8679 1.0000 0.9046
119 F -0.2793 -0.1915 0.9470 0.9793 0.9046 1.0000
120
121 </help>
122 </tool>