annotate lda_analy.xml @ 1:423bcc3a3785 draft

Uploaded correct tarball.
author devteam
date Mon, 28 Jul 2014 11:41:28 -0400 (2014-07-28)
parents
children cf85ea165ce0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
1 <tool id="lda_analy1" name="Perform LDA" version="1.0.1">
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
2 <description>Linear Discriminant Analysis</description>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
3 <requirements>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
4 <requirement type="package" version="2.11.0">R</requirement>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
5 </requirements>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
6 <command interpreter="sh">r_wrapper.sh $script_file</command>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
7 <inputs>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
8 <param format="tabular" name="input" type="data" label="Source file"/>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
9 <param name="cond" size="30" type="integer" value="3" label="Number of principal components" help="See TIP below">
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
10 <validator type="empty_field" message="Enter a valid number of principal components, see syntax below for examples"/>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
11 </param>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
12
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
13 </inputs>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
14 <outputs>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
15 <data format="txt" name="output" />
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
16 </outputs>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
17
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
18 <tests>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
19 <test>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
20 <param name="input" value="matrix_generator_for_pc_and_lda_output.tabular"/>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
21 <output name="output" file="lda_analy_output.txt"/>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
22 <param name="cond" value="2"/>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
23
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
24 </test>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
25 </tests>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
26
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
27 <configfiles>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
28 <configfile name="script_file">
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
29
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
30 rm(list = objects() )
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
31
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
32 ############# FORMAT X DATA #########################
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
33 format&lt;-function(data) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
34 ind=NULL
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
35 for(i in 1 : ncol(data)){
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
36 if (is.na(data[nrow(data),i])) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
37 ind&lt;-c(ind,i)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
38 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
39 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
40 #print(is.null(ind))
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
41 if (!is.null(ind)) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
42 data&lt;-data[,-c(ind)]
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
43 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
44
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
45 data
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
46 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
47
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
48 ########GET RESPONSES ###############################
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
49 get_resp&lt;- function(data) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
50 resp1&lt;-as.vector(data[,ncol(data)])
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
51 resp=numeric(length(resp1))
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
52 for (i in 1:length(resp1)) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
53 if (resp1[i]=="Y ") {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
54 resp[i] = 0
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
55 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
56 if (resp1[i]=="X ") {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
57 resp[i] = 1
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
58 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
59 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
60 return(resp)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
61 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
62
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
63 ######## CHARS TO NUMBERS ###########################
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
64 f_to_numbers&lt;- function(F) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
65 ind&lt;-NULL
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
66 G&lt;-matrix(0,nrow(F), ncol(F))
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
67 for (i in 1:nrow(F)) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
68 for (j in 1:ncol(F)) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
69 G[i,j]&lt;-as.integer(F[i,j])
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
70 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
71 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
72 return(G)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
73 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
74
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
75 ###################NORMALIZING#########################
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
76 norm &lt;- function(M, a=NULL, b=NULL) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
77 C&lt;-NULL
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
78 ind&lt;-NULL
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
79
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
80 for (i in 1: ncol(M)) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
81 if (sd(M[,i])!=0) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
82 M[,i]&lt;-(M[,i]-mean(M[,i]))/sd(M[,i])
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
83 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
84 # else {print(mean(M[,i]))}
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
85 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
86 return(M)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
87 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
88
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
89 ##### LDA DIRECTIONS #################################
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
90 lda_dec &lt;- function(data, k){
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
91 priors=numeric(k)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
92 grandmean&lt;-numeric(ncol(data)-1)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
93 means=matrix(0,k,ncol(data)-1)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
94 B = matrix(0, ncol(data)-1, ncol(data)-1)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
95 N=nrow(data)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
96 for (i in 1:k){
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
97 priors[i]=sum(data[,1]==i)/N
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
98 grp=subset(data,data\$group==i)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
99 means[i,]=mean(grp[,2:ncol(data)])
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
100 #print(means[i,])
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
101 #print(priors[i])
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
102 #print(priors[i]*means[i,])
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
103 grandmean = priors[i]*means[i,] + grandmean
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
104 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
105
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
106 for (i in 1:k) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
107 B= B + priors[i]*((means[i,]-grandmean)%*%t(means[i,]-grandmean))
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
108 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
109
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
110 W = var(data[,2:ncol(data)])
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
111 svdW = svd(W)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
112 inv_sqrtW =solve(svdW\$v %*% diag(sqrt(svdW\$d)) %*% t(svdW\$v))
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
113 B_star= t(inv_sqrtW)%*%B%*%inv_sqrtW
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
114 B_star_decomp = svd(B_star)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
115 directions = inv_sqrtW%*%B_star_decomp\$v
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
116 return( list(directions, B_star_decomp\$d) )
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
117 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
118
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
119 ################ NAIVE BAYES FOR 1D SIR OR LDA ##############
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
120 naive_bayes_classifier &lt;- function(resp, tr_data, test_data, k=2, tau) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
121 tr_data=data.frame(resp=resp, dir=tr_data)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
122 means=numeric(k)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
123 #print(k)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
124 cl=numeric(k)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
125 predclass=numeric(length(test_data))
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
126 for (i in 1:k) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
127 grp = subset(tr_data, resp==i)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
128 means[i] = mean(grp\$dir)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
129 #print(i, means[i])
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
130 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
131 cutoff = tau*means[1]+(1-tau)*means[2]
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
132 #print(tau)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
133 #print(means)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
134 #print(cutoff)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
135 if (cutoff&gt;means[1]) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
136 cl[1]=1
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
137 cl[2]=2
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
138 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
139 else {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
140 cl[1]=2
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
141 cl[2]=1
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
142 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
143
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
144 for (i in 1:length(test_data)) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
145
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
146 if (test_data[i] &lt;= cutoff) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
147 predclass[i] = cl[1]
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
148 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
149 else {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
150 predclass[i] = cl[2]
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
151 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
152 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
153 #print(means)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
154 #print(mean(means))
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
155 #X11()
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
156 #plot(test_data,pch=predclass, col=resp)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
157 predclass
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
158 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
159
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
160 ################# EXTENDED ERROR RATES #################
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
161 ext_error_rate &lt;- function(predclass, actualclass,msg=c("you forgot the message"), pr=1) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
162 er=sum(predclass != actualclass)/length(predclass)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
163
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
164 matr&lt;-data.frame(predclass=predclass,actualclass=actualclass)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
165 escapes = subset(matr, actualclass==1)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
166 subjects = subset(matr, actualclass==2)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
167 er_esc=sum(escapes\$predclass != escapes\$actualclass)/length(escapes\$predclass)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
168 er_subj=sum(subjects\$predclass != subjects\$actualclass)/length(subjects\$predclass)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
169
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
170 if (pr==1) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
171 # print(paste(c(msg, 'overall : ', (1-er)*100, "%."),collapse=" "))
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
172 # print(paste(c(msg, 'within escapes : ', (1-er_esc)*100, "%."),collapse=" "))
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
173 # print(paste(c(msg, 'within subjects: ', (1-er_subj)*100, "%."),collapse=" "))
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
174 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
175 return(c((1-er)*100, (1-er_esc)*100, (1-er_subj)*100))
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
176 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
177
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
178 ## Main Function ##
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
179
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
180 files&lt;-matrix("${input}", 1,1, byrow=T)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
181
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
182 d&lt;-"${cond}" # Number of PC
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
183
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
184 tau&lt;-seq(0,1, by=0.005)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
185 #tau&lt;-seq(0,1, by=0.1)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
186 for_curve=matrix(-10, 3,length(tau))
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
187
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
188 ##############################################################
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
189
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
190 test_data_whole_X &lt;-read.delim(files[1,1], row.names=1)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
191
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
192 #### FORMAT TRAINING DATA ####################################
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
193 # get only necessary columns
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
194
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
195 test_data_whole_X&lt;-format(test_data_whole_X)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
196 oligo_labels&lt;-test_data_whole_X[1:(nrow(test_data_whole_X)-1),ncol(test_data_whole_X)]
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
197 test_data_whole_X&lt;-test_data_whole_X[,1:(ncol(test_data_whole_X)-1)]
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
198
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
199 X_names&lt;-colnames(test_data_whole_X)[1:ncol(test_data_whole_X)]
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
200 test_data_whole_X&lt;-t(test_data_whole_X)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
201 resp&lt;-get_resp(test_data_whole_X)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
202 ldaqda_resp = resp + 1
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
203 a&lt;-sum(resp) # Number of Subject
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
204 b&lt;-length(resp) - a # Number of Escape
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
205 ## FREQUENCIES #################################################
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
206 F&lt;-test_data_whole_X[,1:(ncol(test_data_whole_X)-1)]
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
207 F&lt;-f_to_numbers(F)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
208 FN&lt;-norm(F, a, b)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
209 ss&lt;-svd(FN)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
210 eigvar&lt;-NULL
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
211 eig&lt;-ss\$d^2
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
212
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
213 for ( i in 1:length(ss\$d)) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
214 eigvar[i]&lt;-sum(eig[1:i])/sum(eig)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
215 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
216
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
217 #print(paste(c("Variance explained : ", eigvar[d]*100, "%"), collapse=""))
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
218
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
219 Z&lt;-F%*%ss\$v
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
220
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
221 ldaqda_data &lt;- data.frame(group=ldaqda_resp,Z[,1:d])
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
222 lda_dir&lt;-lda_dec(ldaqda_data,2)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
223 train_lda_pred &lt;-Z[,1:d]%*%lda_dir[[1]]
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
224
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
225 ############# NAIVE BAYES CROSS-VALIDATION #############
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
226 ### LDA #####
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
227
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
228 y&lt;-ldaqda_resp
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
229 X&lt;-F
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
230 cv&lt;-matrix(c(rep('NA',nrow(test_data_whole_X))), nrow(test_data_whole_X), length(tau))
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
231 for (i in 1:nrow(test_data_whole_X)) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
232 # print(i)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
233 resp&lt;-y[-i]
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
234 p&lt;-matrix(X[-i,], dim(X)[1]-1, dim(X)[2])
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
235 testdata&lt;-matrix(X[i,],1,dim(X)[2])
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
236 p1&lt;-norm(p)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
237 sss&lt;-svd(p1)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
238 pred&lt;-(p%*%sss\$v)[,1:d]
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
239 test&lt;- (testdata%*%sss\$v)[,1:d]
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
240 lda &lt;- lda_dec(data.frame(group=resp,pred),2)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
241 pred &lt;- pred[,1:d]%*%lda[[1]][,1]
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
242 test &lt;- test%*%lda[[1]][,1]
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
243 test&lt;-matrix(test, 1, length(test))
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
244 for (t in 1:length(tau)) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
245 cv[i, t] &lt;- naive_bayes_classifier (resp, pred, test,k=2, tau[t])
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
246 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
247 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
248
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
249 for (t in 1:length(tau)) {
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
250 tr_err&lt;-ext_error_rate(cv[,t], ldaqda_resp , c("CV"), 1)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
251 for_curve[1:3,t]&lt;-tr_err
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
252 }
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
253
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
254 dput(for_curve, file="${output}")
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
255
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
256
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
257 </configfile>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
258 </configfiles>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
259
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
260 <help>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
261
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
262 .. class:: infomark
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
263
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
264 **TIP:** If you want to perform Principal Component Analysis (PCA) on the give numeric input data (which corresponds to the "Source file First in "Generate A Matrix" tool), please use *Multivariate Analysis/Principal Component Analysis*
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
265
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
266 -----
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
267
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
268 .. class:: infomark
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
269
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
270 **What it does**
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
271
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
272 This tool consists of the module to perform the Linear Discriminant Analysis as described in Carrel et al., 2006 (PMID: 17009873)
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
273
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
274 *Carrel L, Park C, Tyekucheva S, Dunn J, Chiaromonte F, et al. (2006) Genomic Environment Predicts Expression Patterns on the Human Inactive X Chromosome. PLoS Genet 2(9): e151. doi:10.1371/journal.pgen.0020151*
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
275
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
276 -----
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
277
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
278 .. class:: warningmark
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
279
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
280 **Note**
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
281
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
282 - Output from "Generate A Matrix" tool is used as input file for this tool
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
283 - Output of this tool contains LDA classification success rates for different values of the turning parameter tau (from 0 to 1 with 0.005 interval). This output file will be used to establish the ROC plot, and you can obtain more detail information from this plot.
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
284
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
285
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
286 </help>
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
287
423bcc3a3785 Uploaded correct tarball.
devteam
parents:
diff changeset
288 </tool>