Mercurial > repos > devteam > plot_from_lda
comparison plot_from_lda.xml @ 0:542c4323ed83 draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 28 Jul 2014 11:30:26 -0400 |
parents | |
children | d096b6d081e5 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:542c4323ed83 |
---|---|
1 <tool id="plot_for_lda_output1" name="Draw ROC plot" version="1.0.1"> | |
2 <description>on "Perform LDA" output</description> | |
3 <requirements> | |
4 <requirement type="package" version="2.11.0">R</requirement> | |
5 </requirements> | |
6 | |
7 <command interpreter="sh">r_wrapper.sh $script_file</command> | |
8 | |
9 <inputs> | |
10 <param format="txt" name="input" type="data" label="Source file"> </param> | |
11 <param name="my_title" size="30" type="text" value="My Figure" label="Title of your plot" help="See syntax below"> </param> | |
12 <param name="X_axis" size="30" type="text" value="Text for X axis" label="Legend of X axis in your plot" help="See syntax below"> </param> | |
13 <param name="Y_axis" size="30" type="text" value="Text for Y axis" label="Legend of Y axis in your plot" help="See syntax below"> </param> | |
14 </inputs> | |
15 <outputs> | |
16 <data format="pdf" name="pdf_output" /> | |
17 </outputs> | |
18 | |
19 <tests> | |
20 <test> | |
21 <param name="input" value="lda_analy_output.txt"/> | |
22 <param name="my_title" value="Test Plot1"/> | |
23 <param name="X_axis" value="Test Plot2"/> | |
24 <param name="Y_axis" value="Test Plot3"/> | |
25 <output name="pdf_output" file="plot_for_lda_output.pdf"/> | |
26 </test> | |
27 </tests> | |
28 | |
29 <configfiles> | |
30 <configfile name="script_file"> | |
31 | |
32 rm(list = objects() ) | |
33 | |
34 ############# FORMAT X DATA ######################### | |
35 format<-function(data) { | |
36 ind=NULL | |
37 for(i in 1 : ncol(data)){ | |
38 if (is.na(data[nrow(data),i])) { | |
39 ind<-c(ind,i) | |
40 } | |
41 } | |
42 #print(is.null(ind)) | |
43 if (!is.null(ind)) { | |
44 data<-data[,-c(ind)] | |
45 } | |
46 | |
47 data | |
48 } | |
49 | |
50 ########GET RESPONSES ############################### | |
51 get_resp<- function(data) { | |
52 resp1<-as.vector(data[,ncol(data)]) | |
53 resp=numeric(length(resp1)) | |
54 for (i in 1:length(resp1)) { | |
55 if (resp1[i]=="Control ") { | |
56 resp[i] = 0 | |
57 } | |
58 if (resp1[i]=="XLMR ") { | |
59 resp[i] = 1 | |
60 } | |
61 } | |
62 return(resp) | |
63 } | |
64 | |
65 ######## CHARS TO NUMBERS ########################### | |
66 f_to_numbers<- function(F) { | |
67 ind<-NULL | |
68 G<-matrix(0,nrow(F), ncol(F)) | |
69 for (i in 1:nrow(F)) { | |
70 for (j in 1:ncol(F)) { | |
71 G[i,j]<-as.integer(F[i,j]) | |
72 } | |
73 } | |
74 return(G) | |
75 } | |
76 | |
77 ###################NORMALIZING######################### | |
78 norm <- function(M, a=NULL, b=NULL) { | |
79 C<-NULL | |
80 ind<-NULL | |
81 | |
82 for (i in 1: ncol(M)) { | |
83 if (sd(M[,i])!=0) { | |
84 M[,i]<-(M[,i]-mean(M[,i]))/sd(M[,i]) | |
85 } | |
86 # else {print(mean(M[,i]))} | |
87 } | |
88 return(M) | |
89 } | |
90 | |
91 ##### LDA DIRECTIONS ################################# | |
92 lda_dec <- function(data, k){ | |
93 priors=numeric(k) | |
94 grandmean<-numeric(ncol(data)-1) | |
95 means=matrix(0,k,ncol(data)-1) | |
96 B = matrix(0, ncol(data)-1, ncol(data)-1) | |
97 N=nrow(data) | |
98 for (i in 1:k){ | |
99 priors[i]=sum(data[,1]==i)/N | |
100 grp=subset(data,data\$group==i) | |
101 means[i,]=mean(grp[,2:ncol(data)]) | |
102 #print(means[i,]) | |
103 #print(priors[i]) | |
104 #print(priors[i]*means[i,]) | |
105 grandmean = priors[i]*means[i,] + grandmean | |
106 } | |
107 | |
108 for (i in 1:k) { | |
109 B= B + priors[i]*((means[i,]-grandmean)%*%t(means[i,]-grandmean)) | |
110 } | |
111 | |
112 W = var(data[,2:ncol(data)]) | |
113 svdW = svd(W) | |
114 inv_sqrtW =solve(svdW\$v %*% diag(sqrt(svdW\$d)) %*% t(svdW\$v)) | |
115 B_star= t(inv_sqrtW)%*%B%*%inv_sqrtW | |
116 B_star_decomp = svd(B_star) | |
117 directions = inv_sqrtW%*%B_star_decomp\$v | |
118 return( list(directions, B_star_decomp\$d) ) | |
119 } | |
120 | |
121 ################ NAIVE BAYES FOR 1D SIR OR LDA ############## | |
122 naive_bayes_classifier <- function(resp, tr_data, test_data, k=2, tau) { | |
123 tr_data=data.frame(resp=resp, dir=tr_data) | |
124 means=numeric(k) | |
125 #print(k) | |
126 cl=numeric(k) | |
127 predclass=numeric(length(test_data)) | |
128 for (i in 1:k) { | |
129 grp = subset(tr_data, resp==i) | |
130 means[i] = mean(grp\$dir) | |
131 #print(i, means[i]) | |
132 } | |
133 cutoff = tau*means[1]+(1-tau)*means[2] | |
134 #print(tau) | |
135 #print(means) | |
136 #print(cutoff) | |
137 if (cutoff>means[1]) { | |
138 cl[1]=1 | |
139 cl[2]=2 | |
140 } | |
141 else { | |
142 cl[1]=2 | |
143 cl[2]=1 | |
144 } | |
145 | |
146 for (i in 1:length(test_data)) { | |
147 | |
148 if (test_data[i] <= cutoff) { | |
149 predclass[i] = cl[1] | |
150 } | |
151 else { | |
152 predclass[i] = cl[2] | |
153 } | |
154 } | |
155 #print(means) | |
156 #print(mean(means)) | |
157 #X11() | |
158 #plot(test_data,pch=predclass, col=resp) | |
159 predclass | |
160 } | |
161 | |
162 ################# EXTENDED ERROR RATES ################# | |
163 ext_error_rate <- function(predclass, actualclass,msg=c("you forgot the message"), pr=1) { | |
164 er=sum(predclass != actualclass)/length(predclass) | |
165 | |
166 matr<-data.frame(predclass=predclass,actualclass=actualclass) | |
167 escapes = subset(matr, actualclass==1) | |
168 subjects = subset(matr, actualclass==2) | |
169 er_esc=sum(escapes\$predclass != escapes\$actualclass)/length(escapes\$predclass) | |
170 er_subj=sum(subjects\$predclass != subjects\$actualclass)/length(subjects\$predclass) | |
171 | |
172 if (pr==1) { | |
173 # print(paste(c(msg, 'overall : ', (1-er)*100, "%."),collapse=" ")) | |
174 # print(paste(c(msg, 'within escapes : ', (1-er_esc)*100, "%."),collapse=" ")) | |
175 # print(paste(c(msg, 'within subjects: ', (1-er_subj)*100, "%."),collapse=" ")) | |
176 } | |
177 return(c((1-er)*100, (1-er_esc)*100, (1-er_subj)*100)) | |
178 } | |
179 | |
180 ## Main Function ## | |
181 | |
182 files_alias<-c("${my_title}") | |
183 tau=seq(0,1,by=0.005) | |
184 nfiles=1 | |
185 f = c("${input}") | |
186 | |
187 rez_ext<-list() | |
188 for (i in 1:nfiles) { | |
189 rez_ext[[i]]<-dget(paste(f[i], sep="",collapse="")) | |
190 } | |
191 | |
192 tau<-tau[1:(length(tau)-1)] | |
193 for (i in 1:nfiles) { | |
194 rez_ext[[i]]<-rez_ext[[i]][,1:(length(tau)-1)] | |
195 } | |
196 | |
197 ######## OPTIMAIL TAU ########################### | |
198 | |
199 #rez_ext | |
200 | |
201 rate<-c("Optimal tau","Tr total", "Tr Y", "Tr X") | |
202 | |
203 m_tr<-numeric(nfiles) | |
204 m_xp22<-numeric(nfiles) | |
205 m_x<-numeric(nfiles) | |
206 | |
207 for (i in 1:nfiles) { | |
208 r<-rez_ext[[i]] | |
209 #tr | |
210 # rate<-rbind(rate, c(files_alias[i]," "," "," ") ) | |
211 mm<-which((r[3,])==max(r[3,])) | |
212 | |
213 m_tr[i]<-mm[1] | |
214 rate<-rbind(rate,c(tau[m_tr[i]],r[,m_tr[i]])) | |
215 } | |
216 print(rate) | |
217 | |
218 pdf(file= paste("${pdf_output}")) | |
219 | |
220 plot(rez_ext[[i]][2,]~rez_ext[[i]][3,], xlim=c(0,100), ylim=c(0,100), xlab="${X_axis} [1-FP(False Positive)]", ylab="${Y_axis} [1-FP(False Positive)]", type="l", lty=1, col="blue", xaxt='n', yaxt='n') | |
221 for (i in 1:nfiles) { | |
222 lines(rez_ext[[i]][2,]~rez_ext[[i]][3,], xlab="${X_axis} [1-FP(False Positive)]", ylab="${Y_axis} [1-FP(False Positive)]", type="l", lty=1, col=i) | |
223 # pt=c(r,) | |
224 points(x=rez_ext[[i]][3,m_tr[i]],y=rez_ext[[i]][2,m_tr[i]], pch=16, col=i) | |
225 } | |
226 | |
227 | |
228 title(main="${my_title}", adj=0, cex.main=1.1) | |
229 axis(2, at=c(0,20,40,60,80,100), labels=c('0','20','40','60','80','100%')) | |
230 axis(1, at=c(0,20,40,60,80,100), labels=c('0','20','40','60','80','100%')) | |
231 | |
232 #leg=c("10 kb","50 kb","100 kb") | |
233 #legend("bottomleft",legend=leg , col=c(1,2,3), lty=c(1,1,1)) | |
234 | |
235 #dev.off() | |
236 | |
237 </configfile> | |
238 </configfiles> | |
239 | |
240 | |
241 <help> | |
242 .. class:: infomark | |
243 | |
244 **What it does** | |
245 | |
246 This tool generates a Receiver Operating Characteristic (ROC) plot that shows LDA classification success rates for different values of the tuning parameter tau as Figure 3 in Carrel et al., 2006 (PMID: 17009873). | |
247 | |
248 *Carrel L, Park C, Tyekucheva S, Dunn J, Chiaromonte F, et al. (2006) Genomic Environment Predicts Expression Patterns on the Human Inactive X Chromosome. PLoS Genet 2(9): e151. doi:10.1371/journal.pgen.0020151* | |
249 | |
250 ----- | |
251 | |
252 .. class:: warningmark | |
253 | |
254 **Note** | |
255 | |
256 - Output from "Perform LDA" tool is used as input file for this tool. | |
257 | |
258 </help> | |
259 | |
260 | |
261 | |
262 </tool> |