comparison ST Kinatest/test-data/ST_Kinatest.R @ 0:b0493d923404 draft

Uploaded
author jfb
date Wed, 27 Jun 2018 11:53:32 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b0493d923404
1 ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE,header = FALSE)
2 NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE)
3 SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE, header = FALSE)
4
5 ScreenerFilename<-"screenerST.csv"
6
7
8
9 FILENAME<-"output1"
10 FILENAME2<-"output2"
11 FILENAME3<-"output3"
12 FILENAME4<-"output4"
13 FILENAME5<-"output5"
14
15
16 SubstrateBackgroundFrequency<-t(SubstrateBackgroundFrequency)
17 # number<-nrow(SubstrateBackgroundFrequency)-1
18 SubstrateBackgroundFrequency<-SubstrateBackgroundFrequency[2:nrow(SubstrateBackgroundFrequency),]
19 Sub<-na.omit(SubstrateBackgroundFrequency)
20 SubstrateBackgroundFrequency<-Sub
21
22 args = commandArgs(trailingOnly=TRUE)
23 TodaysKinase<-"AKT"
24
25 #as.character(args[1])
26
27
28
29
30
31
32 OutputMatrix<-"KinaseMatrix.csv"
33 CharacterizationTable<-"CharacterizationTableForThisKinase.csv"
34 SDtable<-"SDtableforthisKinase"
35 SiteSelectivityTable<-"SiteSelectivityForThisKinase"
36
37
38
39 substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15)
40 #SeqsToBeScored<-"asdasd"
41
42 for (i in 2:nrow(ImportedSubstrateList))
43 {
44 substratemotif<-ImportedSubstrateList[i,4:18]
45 T_or_S<-unlist(strsplit(as.character(substratemotif[8]),split = ""))
46 substratemotif[8]<-T_or_S[2]
47 #substratemotif<-paste(substratemotif,sep = "",collapse = "")
48 j=i-1
49 substratemotif<-unlist(substratemotif)
50 substrates[j,1:15]<-substratemotif
51 }
52
53 # SpacesToOs<-c(""="O",)
54 # substrates<-SpacesToOs[substrates]
55
56 #SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2]
57
58 if(2==2){
59 Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
60 Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
61 Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
62 Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
63 Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
64 Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
65 Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
66 Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
67 Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
68 Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
69 Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
70 Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
71 Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
72 Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
73 Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
74 Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
75 Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
76 Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
77 Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
78 Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
79
80 AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean)
81
82 Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
83 Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
84 Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
85 Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
86 Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
87 Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
88 Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
89 Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
90 Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
91 Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
92 Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
93 Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
94 Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
95 Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
96 Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
97 Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
98 Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
99 Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
100 Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
101 Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
102 }
103 AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd)
104 #this is subbackfreq SDs
105
106 SBF_statisticalvalues<-cbind(AllMeans,AllSDs)
107
108 #create the percent table
109 if (1==1){
110 Column1<-substrates[,1]
111 Column2<-substrates[,2]
112 Column3<-substrates[,3]
113 Column4<-substrates[,4]
114 Column5<-substrates[,5]
115 Column6<-substrates[,6]
116 Column7<-substrates[,7]
117 Column8<-substrates[,8]
118 Column9<-substrates[,9]
119 Column10<-substrates[,10]
120 Column11<-substrates[,11]
121 Column12<-substrates[,12]
122 Column13<-substrates[,13]
123 Column14<-substrates[,14]
124 Column15<-substrates[,15]
125
126 spaces1<-sum((Column1%in% ""))
127 spaces2<-sum(Column2%in% "")
128 spaces3<-sum(Column3%in% "")
129 spaces4<-sum(Column4%in% "")
130 spaces5<-sum(Column5%in% "")
131 spaces6<-sum(Column6%in% "")
132 spaces7<-sum(Column7%in% "")
133 spaces8<-sum(Column8%in% "")
134 spaces9<-sum(Column9%in% "")
135 spaces10<-sum(Column10%in% "")
136 spaces11<-sum(Column11%in% "")
137 spaces12<-sum(Column12%in% "")
138 spaces13<-sum(Column13%in% "")
139 spaces14<-sum(Column14%in% "")
140 spaces15<-sum(Column15%in% "")
141
142 A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1)
143 A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2)
144 A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3)
145 A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4)
146 A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5)
147 A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6)
148 A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7)
149 A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8)
150 A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9)
151 A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10)
152 A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11)
153 A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12)
154 A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13)
155 A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14)
156 A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15)
157 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
158
159 C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1)
160 C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2)
161 C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3)
162 C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4)
163 C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5)
164 C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6)
165 C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7)
166 C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8)
167 C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9)
168 C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10)
169 C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11)
170 C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12)
171 C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13)
172 C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14)
173 C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15)
174 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
175
176 D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1)
177 D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2)
178 D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3)
179 D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4)
180 D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5)
181 D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6)
182 D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7)
183 D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8)
184 D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9)
185 D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10)
186 D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11)
187 D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12)
188 D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13)
189 D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14)
190 D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15)
191 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
192
193 E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1)
194 E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2)
195 E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3)
196 E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4)
197 E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5)
198 E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6)
199 E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7)
200 E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8)
201 E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9)
202 E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10)
203 E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11)
204 E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12)
205 E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13)
206 E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14)
207 E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15)
208 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
209
210
211 F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1)
212 F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2)
213 F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3)
214 F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4)
215 F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5)
216 F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6)
217 F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7)
218 F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8)
219 F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9)
220 F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10)
221 F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11)
222 F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12)
223 F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13)
224 F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14)
225 F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15)
226 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
227
228
229 G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1)
230 G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2)
231 G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3)
232 G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4)
233 G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5)
234 G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6)
235 G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7)
236 G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8)
237 G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9)
238 G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10)
239 G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11)
240 G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12)
241 G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13)
242 G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14)
243 G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15)
244 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
245
246
247 H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1)
248 H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2)
249 H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3)
250 H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4)
251 H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5)
252 H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6)
253 H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7)
254 H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8)
255 H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9)
256 H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10)
257 H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11)
258 H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12)
259 H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13)
260 H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14)
261 H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15)
262 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
263
264
265 I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1)
266 I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2)
267 I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3)
268 I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4)
269 I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5)
270 I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6)
271 I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7)
272 I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8)
273 I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9)
274 I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10)
275 I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11)
276 I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12)
277 I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13)
278 I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14)
279 I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15)
280 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
281
282
283 K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1)
284 K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2)
285 K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3)
286 K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4)
287 K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5)
288 K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6)
289 K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7)
290 K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8)
291 K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9)
292 K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10)
293 K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11)
294 K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12)
295 K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13)
296 K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14)
297 K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15)
298 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
299
300
301 L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1)
302 L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2)
303 L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3)
304 L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4)
305 L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5)
306 L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6)
307 L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7)
308 L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8)
309 L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9)
310 L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10)
311 L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11)
312 L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12)
313 L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13)
314 L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14)
315 L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15)
316 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
317
318
319 M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1)
320 M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2)
321 M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3)
322 M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4)
323 M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5)
324 M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6)
325 M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7)
326 M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8)
327 M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9)
328 M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10)
329 M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11)
330 M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12)
331 M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13)
332 M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14)
333 M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15)
334 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
335
336
337 N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1)
338 N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2)
339 N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3)
340 N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4)
341 N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5)
342 N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6)
343 N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7)
344 N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8)
345 N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9)
346 N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10)
347 N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11)
348 N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12)
349 N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13)
350 N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14)
351 N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15)
352 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
353
354
355 P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1)
356 P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2)
357 P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3)
358 P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4)
359 P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5)
360 P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6)
361 P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7)
362 P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8)
363 P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9)
364 P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10)
365 P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11)
366 P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12)
367 P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13)
368 P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14)
369 P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15)
370 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
371
372
373 Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1)
374 Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2)
375 Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3)
376 Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4)
377 Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5)
378 Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6)
379 Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7)
380 Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8)
381 Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9)
382 Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10)
383 Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11)
384 Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12)
385 Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13)
386 Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14)
387 Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15)
388 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
389
390
391 R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1)
392 R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2)
393 R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3)
394 R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4)
395 R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5)
396 R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6)
397 R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7)
398 R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8)
399 R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9)
400 R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10)
401 R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11)
402 R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12)
403 R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13)
404 R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14)
405 R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15)
406 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
407
408
409 S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1)
410 S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2)
411 S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3)
412 S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4)
413 S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5)
414 S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6)
415 S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7)
416 S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8)
417 S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9)
418 S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10)
419 S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11)
420 S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12)
421 S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13)
422 S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14)
423 S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15)
424 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
425
426
427 T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1)
428 T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2)
429 T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3)
430 T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4)
431 T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5)
432 T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6)
433 T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7)
434 T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8)
435 T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9)
436 T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10)
437 T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11)
438 T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12)
439 T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13)
440 T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14)
441 T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15)
442 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
443
444
445 V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1)
446 V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2)
447 V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3)
448 V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4)
449 V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5)
450 V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6)
451 V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7)
452 V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8)
453 V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9)
454 V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10)
455 V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11)
456 V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12)
457 V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13)
458 V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14)
459 V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15)
460 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
461
462
463 W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1)
464 W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2)
465 W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3)
466 W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4)
467 W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5)
468 W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6)
469 W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7)
470 W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8)
471 W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9)
472 W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10)
473 W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11)
474 W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12)
475 W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13)
476 W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14)
477 W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15)
478 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
479
480
481 Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1)
482 Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2)
483 Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3)
484 Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4)
485 Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5)
486 Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6)
487 Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7)
488 Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8)
489 Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9)
490 Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10)
491 Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11)
492 Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12)
493 Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13)
494 Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14)
495 Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15)
496 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
497 }
498 #this is substrate percents
499
500 #A C D E F G H I K L N P Q R S T V W Y
501
502 PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
503 PercentTable<-PercentTable*100
504
505 #create the SD table
506 SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable))
507 #for every row, a percertage minus the same mean over the same SD
508 if(1==1){
509 SDtable[1,]<-(PercentTable[1,]-Amean)/Asd
510 SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd
511 SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd
512 SDtable[4,]<-(PercentTable[4,]-Emean)/Esd
513 SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd
514 SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd
515 SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd
516 SDtable[8,]<-(PercentTable[8,]-Imean)/Isd
517 SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd
518 SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd
519 SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd
520 SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd
521 SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd
522 SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd
523 SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd
524 SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd
525 SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd
526 SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd
527 SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd
528 SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd
529 }
530
531
532 SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")
533
534 SumOfSigmaAAs<-c(1:15)
535
536 for (i in 1:15){
537 SumOfSigmasValue<-0
538 for (j in 1:20){
539 value<-0
540 if (SDtable[j,i]>2){
541 q=j+1
542 value<-sum(substrates[,i]==SetOfAAs[q])
543 }
544 SumOfSigmasValue<-SumOfSigmasValue+value
545 }
546 SumOfSigmaAAs[i]<-SumOfSigmasValue
547 }
548
549 AAs1<-length(substrates[,1])-sum(substrates[,1]=="")
550 AAs2<-length(substrates[,2])-sum(substrates[,2]=="")
551 AAs3<-length(substrates[,3])-sum(substrates[,3]=="")
552 AAs4<-length(substrates[,4])-sum(substrates[,4]=="")
553 AAs5<-length(substrates[,5])-sum(substrates[,5]=="")
554 AAs6<-length(substrates[,6])-sum(substrates[,6]=="")
555 AAs7<-length(substrates[,7])-sum(substrates[,7]=="")
556 AAs8<-length(substrates[,8])-sum(substrates[,8]=="")
557 AAs9<-length(substrates[,9])-sum(substrates[,9]=="")
558 AAs10<-length(substrates[,10])-sum(substrates[,10]=="")
559 AAs11<-length(substrates[,11])-sum(substrates[,11]=="")
560 AAs12<-length(substrates[,12])-sum(substrates[,12]=="")
561 AAs13<-length(substrates[,13])-sum(substrates[,13]=="")
562 AAs14<-length(substrates[,14])-sum(substrates[,14]=="")
563 AAs15<-length(substrates[,15])-sum(substrates[,15]=="")
564
565 AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9,AAs10,AAs11,AAs12,AAs13,AAs14,AAs15)
566 # AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]),
567 # length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]),
568 # length(substrates[,9]))
569
570 SumOfExpectedSigmaAAs<-c(1:15)
571 for (i in 1:15){
572 ExpectedValue<-0
573 for (j in 1:20){
574 value<-0
575 if (SDtable[j,i]>2){
576 value<-AllMeans[j]
577 }
578 ExpectedValue<-ExpectedValue+value
579 }
580 SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100
581 }
582
583 #SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs
584
585 SelectivityRow<-rep(0,times=15)
586
587 for (q in 1:15) {
588 SelectivityRow[q]<-(SumOfSigmaAAs[q]/SumOfExpectedSigmaAAs[q])*(SumOfSigmaAAs[q]/AAsAtPositions[q])
589 }
590
591 SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow)
592
593 SetOfAAs<-matrix(data = SetOfAAs,ncol = 1)
594 SDtableu<-SDtable
595 HeaderSD<-c(-7:7)
596 SDtable<-rbind(HeaderSD,SDtableu)
597 row.names(SDtable)<-NULL
598 SDtable<-data.frame(SetOfAAs,SDtable)
599
600 PercentTable<-rbind(HeaderSD,PercentTable)
601 row.names(PercentTable)<-NULL
602 PercentTable<-data.frame(SetOfAAs,PercentTable)
603 numberofY<-as.numeric(SubstrateBackgroundFrequency[,34])
604 numberofY<-numberofY[!is.na(numberofY)]
605
606 numberofPY<-as.numeric(SubstrateBackgroundFrequency[,35])
607 numberofPY<-numberofPY[!is.na(numberofPY)]
608
609 NormalizationScore<-sum(numberofPY)/sum(numberofY)
610
611 # positions<-matrix(data = NA, nrow=20,ncol = 15)
612 #
613 # #column1
614 #
615 # for (q in 1:15) {
616 # sA<-sum(substrates[,i]=="A")
617 # positions[1,i]<-sA
618 # sC<-sum(substrates[,i]=="C")
619 # positions[2,i]<-sC
620 # sD<-sum(substrates[,i]=="D")
621 # positions[3,i]<-sD
622 # sE<-sum(substrates[,i]=="E")
623 # positions[4,i]<-sE
624 # sF<-sum(substrates[,i]=="F")
625 # sG<-sum(substrates[,i]=="G")
626 # sH<-sum(substrates[,i]=="H")
627 # sI<-sum(substrates[,i]=="I")
628 # sK<-sum(substrates[,i]=="K")
629 # sL<-sum(substrates[,i]=="L")
630 # sM<-sum(substrates[,i]=="M")
631 # sN<-sum(substrates[,i]=="N")
632 # sP<-sum(substrates[,i]=="P")
633 # sQ<-sum(substrates[,i]=="Q")
634 # sR<-sum(substrates[,i]=="R")
635 # sS<-sum(substrates[,i]=="S")
636 # sT<-sum(substrates[,i]=="T")
637 # sV<-sum(substrates[,i]=="V")
638 # sW<-sum(substrates[,i]=="W")
639 # sY<-sum(substrates[,i]=="Y")
640 # positions[5,i]<-sF
641 # positions[6,i]<-sG
642 # positions[7,i]<-sH
643 # positions[8,i]<-sI
644 # positions[9,i]<-sK
645 # positions[10,i]<-sL
646 # positions[11,i]<-sM
647 # positions[12,i]<-sN
648 # positions[13,i]<-sP
649 # positions[14,i]<-sQ
650 # positions[15,i]<-sR
651 # positions[16,i]<-sS
652 # positions[17,i]<-sT
653 # positions[18,i]<-sV
654 # positions[19,i]<-sW
655 # positions[20,i]<-sY
656 # }
657
658 #here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot?
659 if (6==6){
660 Column1<-substrates[,1]
661 Column2<-substrates[,2]
662 Column3<-substrates[,3]
663 Column4<-substrates[,4]
664 Column5<-substrates[,5]
665 Column6<-substrates[,6]
666 Column7<-substrates[,7]
667 Column8<-substrates[,8]
668 Column9<-substrates[,9]
669 Column10<-substrates[,10]
670 Column11<-substrates[,11]
671 Column12<-substrates[,12]
672 Column13<-substrates[,13]
673 Column14<-substrates[,14]
674 Column15<-substrates[,15]
675
676 spaces1<-sum((Column1%in% ""))
677 spaces2<-sum(Column2%in% "")
678 spaces3<-sum(Column3%in% "")
679 spaces4<-sum(Column4%in% "")
680 spaces5<-sum(Column5%in% "")
681 spaces6<-sum(Column6%in% "")
682 spaces7<-sum(Column7%in% "")
683 spaces8<-sum(Column8%in% "")
684 spaces9<-sum(Column9%in% "")
685 spaces10<-sum(Column10%in% "")
686 spaces11<-sum(Column11%in% "")
687 spaces12<-sum(Column12%in% "")
688 spaces13<-sum(Column13%in% "")
689 spaces14<-sum(Column14%in% "")
690 spaces15<-sum(Column15%in% "")
691
692 A1<-sum(Column1 %in% "A")
693 A2<-sum(Column2 %in% "A")
694 A3<-sum(Column3 %in% "A")
695 A4<-sum(Column4 %in% "A")
696 A5<-sum(Column5 %in% "A")
697 A6<-sum(Column6 %in% "A")
698 A7<-sum(Column7 %in% "A")
699 A8<-sum(Column8 %in% "A")
700 A9<-sum(Column9 %in% "A")
701 A10<-sum(Column10 %in% "A")
702 A11<-sum(Column11 %in% "A")
703 A12<-sum(Column12 %in% "A")
704 A13<-sum(Column13 %in% "A")
705 A14<-sum(Column14 %in% "A")
706 A15<-sum(Column15 %in% "A")
707 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
708
709 C1<-sum(Column1 %in% "C")
710 C2<-sum(Column2 %in% "C")
711 C3<-sum(Column3 %in% "C")
712 C4<-sum(Column4 %in% "C")
713 C5<-sum(Column5 %in% "C")
714 C6<-sum(Column6 %in% "C")
715 C7<-sum(Column7 %in% "C")
716 C8<-sum(Column8 %in% "C")
717 C9<-sum(Column9 %in% "C")
718 C10<-sum(Column10 %in% "C")
719 C11<-sum(Column11 %in% "C")
720 C12<-sum(Column12 %in% "C")
721 C13<-sum(Column13 %in% "C")
722 C14<-sum(Column14 %in% "C")
723 C15<-sum(Column15 %in% "C")
724 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
725
726 D1<-sum(Column1 %in% "D")
727 D2<-sum(Column2 %in% "D")
728 D3<-sum(Column3 %in% "D")
729 D4<-sum(Column4 %in% "D")
730 D5<-sum(Column5 %in% "D")
731 D6<-sum(Column6 %in% "D")
732 D7<-sum(Column7 %in% "D")
733 D8<-sum(Column8 %in% "D")
734 D9<-sum(Column9 %in% "D")
735 D10<-sum(Column10 %in% "D")
736 D11<-sum(Column11 %in% "D")
737 D12<-sum(Column12 %in% "D")
738 D13<-sum(Column13 %in% "D")
739 D14<-sum(Column14 %in% "D")
740 D15<-sum(Column15 %in% "D")
741 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
742
743 E1<-sum(Column1 %in% "E")
744 E2<-sum(Column2 %in% "E")
745 E3<-sum(Column3 %in% "E")
746 E4<-sum(Column4 %in% "E")
747 E5<-sum(Column5 %in% "E")
748 E6<-sum(Column6 %in% "E")
749 E7<-sum(Column7 %in% "E")
750 E8<-sum(Column8 %in% "E")
751 E9<-sum(Column9 %in% "E")
752 E10<-sum(Column10 %in% "E")
753 E11<-sum(Column11 %in% "E")
754 E12<-sum(Column12 %in% "E")
755 E13<-sum(Column13 %in% "E")
756 E14<-sum(Column14 %in% "E")
757 E15<-sum(Column15 %in% "E")
758 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
759
760 F1<-sum(Column1 %in% "F")
761 F2<-sum(Column2 %in% "F")
762 F3<-sum(Column3 %in% "F")
763 F4<-sum(Column4 %in% "F")
764 F5<-sum(Column5 %in% "F")
765 F6<-sum(Column6 %in% "F")
766 F7<-sum(Column7 %in% "F")
767 F8<-sum(Column8 %in% "F")
768 F9<-sum(Column9 %in% "F")
769 F10<-sum(Column10 %in% "F")
770 F11<-sum(Column11 %in% "F")
771 F12<-sum(Column12 %in% "F")
772 F13<-sum(Column13 %in% "F")
773 F14<-sum(Column14 %in% "F")
774 F15<-sum(Column15 %in% "F")
775 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
776
777 G1<-sum(Column1 %in% "G")
778 G2<-sum(Column2 %in% "G")
779 G3<-sum(Column3 %in% "G")
780 G4<-sum(Column4 %in% "G")
781 G5<-sum(Column5 %in% "G")
782 G6<-sum(Column6 %in% "G")
783 G7<-sum(Column7 %in% "G")
784 G8<-sum(Column8 %in% "G")
785 G9<-sum(Column9 %in% "G")
786 G10<-sum(Column10 %in% "G")
787 G11<-sum(Column11 %in% "G")
788 G12<-sum(Column12 %in% "G")
789 G13<-sum(Column13 %in% "G")
790 G14<-sum(Column14 %in% "G")
791 G15<-sum(Column15 %in% "G")
792 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
793
794 H1<-sum(Column1 %in% "H")
795 H2<-sum(Column2 %in% "H")
796 H3<-sum(Column3 %in% "H")
797 H4<-sum(Column4 %in% "H")
798 H5<-sum(Column5 %in% "H")
799 H6<-sum(Column6 %in% "H")
800 H7<-sum(Column7 %in% "H")
801 H8<-sum(Column8 %in% "H")
802 H9<-sum(Column9 %in% "H")
803 H10<-sum(Column10 %in% "H")
804 H11<-sum(Column11 %in% "H")
805 H12<-sum(Column12 %in% "H")
806 H13<-sum(Column13 %in% "H")
807 H14<-sum(Column14 %in% "H")
808 H15<-sum(Column15 %in% "H")
809 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
810
811 I1<-sum(Column1 %in% "I")
812 I2<-sum(Column2 %in% "I")
813 I3<-sum(Column3 %in% "I")
814 I4<-sum(Column4 %in% "I")
815 I5<-sum(Column5 %in% "I")
816 I6<-sum(Column6 %in% "I")
817 I7<-sum(Column7 %in% "I")
818 I8<-sum(Column8 %in% "I")
819 I9<-sum(Column9 %in% "I")
820 I10<-sum(Column10 %in% "I")
821 I11<-sum(Column11 %in% "I")
822 I12<-sum(Column12 %in% "I")
823 I13<-sum(Column13 %in% "I")
824 I14<-sum(Column14 %in% "I")
825 I15<-sum(Column15 %in% "I")
826 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
827
828 K1<-sum(Column1 %in% "K")
829 K2<-sum(Column2 %in% "K")
830 K3<-sum(Column3 %in% "K")
831 K4<-sum(Column4 %in% "K")
832 K5<-sum(Column5 %in% "K")
833 K6<-sum(Column6 %in% "K")
834 K7<-sum(Column7 %in% "K")
835 K8<-sum(Column8 %in% "K")
836 K9<-sum(Column9 %in% "K")
837 K10<-sum(Column10 %in% "K")
838 K11<-sum(Column11 %in% "K")
839 K12<-sum(Column12 %in% "K")
840 K13<-sum(Column13 %in% "K")
841 K14<-sum(Column14 %in% "K")
842 K15<-sum(Column15 %in% "K")
843 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
844
845 L1<-sum(Column1 %in% "L")
846 L2<-sum(Column2 %in% "L")
847 L3<-sum(Column3 %in% "L")
848 L4<-sum(Column4 %in% "L")
849 L5<-sum(Column5 %in% "L")
850 L6<-sum(Column6 %in% "L")
851 L7<-sum(Column7 %in% "L")
852 L8<-sum(Column8 %in% "L")
853 L9<-sum(Column9 %in% "L")
854 L10<-sum(Column10 %in% "L")
855 L11<-sum(Column11 %in% "L")
856 L12<-sum(Column12 %in% "L")
857 L13<-sum(Column13 %in% "L")
858 L14<-sum(Column14 %in% "L")
859 L15<-sum(Column15 %in% "L")
860 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
861
862 M1<-sum(Column1 %in% "M")
863 M2<-sum(Column2 %in% "M")
864 M3<-sum(Column3 %in% "M")
865 M4<-sum(Column4 %in% "M")
866 M5<-sum(Column5 %in% "M")
867 M6<-sum(Column6 %in% "M")
868 M7<-sum(Column7 %in% "M")
869 M8<-sum(Column8 %in% "M")
870 M9<-sum(Column9 %in% "M")
871 M10<-sum(Column10 %in% "M")
872 M11<-sum(Column11 %in% "M")
873 M12<-sum(Column12 %in% "M")
874 M13<-sum(Column13 %in% "M")
875 M14<-sum(Column14 %in% "M")
876 M15<-sum(Column15 %in% "M")
877 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
878
879 N1<-sum(Column1 %in% "N")
880 N2<-sum(Column2 %in% "N")
881 N3<-sum(Column3 %in% "N")
882 N4<-sum(Column4 %in% "N")
883 N5<-sum(Column5 %in% "N")
884 N6<-sum(Column6 %in% "N")
885 N7<-sum(Column7 %in% "N")
886 N8<-sum(Column8 %in% "N")
887 N9<-sum(Column9 %in% "N")
888 N10<-sum(Column10 %in% "N")
889 N11<-sum(Column11 %in% "N")
890 N12<-sum(Column12 %in% "N")
891 N13<-sum(Column13 %in% "N")
892 N14<-sum(Column14 %in% "N")
893 N15<-sum(Column15 %in% "N")
894 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
895
896 P1<-sum(Column1 %in% "P")
897 P2<-sum(Column2 %in% "P")
898 P3<-sum(Column3 %in% "P")
899 P4<-sum(Column4 %in% "P")
900 P5<-sum(Column5 %in% "P")
901 P6<-sum(Column6 %in% "P")
902 P7<-sum(Column7 %in% "P")
903 P8<-sum(Column8 %in% "P")
904 P9<-sum(Column9 %in% "P")
905 P10<-sum(Column10 %in% "P")
906 P11<-sum(Column11 %in% "P")
907 P12<-sum(Column12 %in% "P")
908 P13<-sum(Column13 %in% "P")
909 P14<-sum(Column14 %in% "P")
910 P15<-sum(Column15 %in% "P")
911 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
912
913 Q1<-sum(Column1 %in% "Q")
914 Q2<-sum(Column2 %in% "Q")
915 Q3<-sum(Column3 %in% "Q")
916 Q4<-sum(Column4 %in% "Q")
917 Q5<-sum(Column5 %in% "Q")
918 Q6<-sum(Column6 %in% "Q")
919 Q7<-sum(Column7 %in% "Q")
920 Q8<-sum(Column8 %in% "Q")
921 Q9<-sum(Column9 %in% "Q")
922 Q10<-sum(Column10 %in% "Q")
923 Q11<-sum(Column11 %in% "Q")
924 Q12<-sum(Column12 %in% "Q")
925 Q13<-sum(Column13 %in% "Q")
926 Q14<-sum(Column14 %in% "Q")
927 Q15<-sum(Column15 %in% "Q")
928 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
929
930 R1<-sum(Column1 %in% "R")
931 R2<-sum(Column2 %in% "R")
932 R3<-sum(Column3 %in% "R")
933 R4<-sum(Column4 %in% "R")
934 R5<-sum(Column5 %in% "R")
935 R6<-sum(Column6 %in% "R")
936 R7<-sum(Column7 %in% "R")
937 R8<-sum(Column8 %in% "R")
938 R9<-sum(Column9 %in% "R")
939 R10<-sum(Column10 %in% "R")
940 R11<-sum(Column11 %in% "R")
941 R12<-sum(Column12 %in% "R")
942 R13<-sum(Column13 %in% "R")
943 R14<-sum(Column14 %in% "R")
944 R15<-sum(Column15 %in% "R")
945 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
946
947 S1<-sum(Column1 %in% "S")
948 S2<-sum(Column2 %in% "S")
949 S3<-sum(Column3 %in% "S")
950 S4<-sum(Column4 %in% "S")
951 S5<-sum(Column5 %in% "S")
952 S6<-sum(Column6 %in% "S")
953 S7<-sum(Column7 %in% "S")
954 S8<-sum(Column8 %in% "S")
955 S9<-sum(Column9 %in% "S")
956 S10<-sum(Column10 %in% "S")
957 S11<-sum(Column11 %in% "S")
958 S12<-sum(Column12 %in% "S")
959 S13<-sum(Column13 %in% "S")
960 S14<-sum(Column14 %in% "S")
961 S15<-sum(Column15 %in% "S")
962 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
963
964 T1<-sum(Column1 %in% "T")
965 T2<-sum(Column2 %in% "T")
966 T3<-sum(Column3 %in% "T")
967 T4<-sum(Column4 %in% "T")
968 T5<-sum(Column5 %in% "T")
969 T6<-sum(Column6 %in% "T")
970 T7<-sum(Column7 %in% "T")
971 T8<-sum(Column8 %in% "T")
972 T9<-sum(Column9 %in% "T")
973 T10<-sum(Column10 %in% "T")
974 T11<-sum(Column11 %in% "T")
975 T12<-sum(Column12 %in% "T")
976 T13<-sum(Column13 %in% "T")
977 T14<-sum(Column14 %in% "T")
978 T15<-sum(Column15 %in% "T")
979 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
980
981 V1<-sum(Column1 %in% "V")
982 V2<-sum(Column2 %in% "V")
983 V3<-sum(Column3 %in% "V")
984 V4<-sum(Column4 %in% "V")
985 V5<-sum(Column5 %in% "V")
986 V6<-sum(Column6 %in% "V")
987 V7<-sum(Column7 %in% "V")
988 V8<-sum(Column8 %in% "V")
989 V9<-sum(Column9 %in% "V")
990 V10<-sum(Column10 %in% "V")
991 V11<-sum(Column11 %in% "V")
992 V12<-sum(Column12 %in% "V")
993 V13<-sum(Column13 %in% "V")
994 V14<-sum(Column14 %in% "V")
995 V15<-sum(Column15 %in% "V")
996 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
997
998 W1<-sum(Column1 %in% "W")
999 W2<-sum(Column2 %in% "W")
1000 W3<-sum(Column3 %in% "W")
1001 W4<-sum(Column4 %in% "W")
1002 W5<-sum(Column5 %in% "W")
1003 W6<-sum(Column6 %in% "W")
1004 W7<-sum(Column7 %in% "W")
1005 W8<-sum(Column8 %in% "W")
1006 W9<-sum(Column9 %in% "W")
1007 W10<-sum(Column10 %in% "W")
1008 W11<-sum(Column11 %in% "W")
1009 W12<-sum(Column12 %in% "W")
1010 W13<-sum(Column13 %in% "W")
1011 W14<-sum(Column14 %in% "W")
1012 W15<-sum(Column15 %in% "W")
1013 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
1014
1015 Y1<-sum(Column1 %in% "Y")
1016 Y2<-sum(Column2 %in% "Y")
1017 Y3<-sum(Column3 %in% "Y")
1018 Y4<-sum(Column4 %in% "Y")
1019 Y5<-sum(Column5 %in% "Y")
1020 Y6<-sum(Column6 %in% "Y")
1021 Y7<-sum(Column7 %in% "Y")
1022 Y8<-sum(Column8 %in% "Y")
1023 Y9<-sum(Column9 %in% "Y")
1024 Y10<-sum(Column10 %in% "Y")
1025 Y11<-sum(Column11 %in% "Y")
1026 Y12<-sum(Column12 %in% "Y")
1027 Y13<-sum(Column13 %in% "Y")
1028 Y14<-sum(Column14 %in% "Y")
1029 Y15<-sum(Column15 %in% "Y")
1030 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
1031 PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
1032 }
1033 #endogenous prob matrix is AA position over subbackfreqmean
1034 dim(PositionTable)
1035 EPMtable<-PositionTable
1036 # EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean))
1037 # EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean))
1038 # EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean))
1039 # EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean))
1040 # EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean))
1041 # EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean))
1042 # EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean))
1043 # EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean))
1044 # EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean))
1045 # EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean))
1046 # EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean))
1047 # EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean))
1048 # EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean))
1049 # EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean))
1050 # EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean))
1051 # EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean))
1052 # EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean))
1053 # EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean))
1054 # EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean))
1055 # EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean))
1056
1057 columns<-c(length(Column1)-sum(Column1==""),
1058 length(Column2)-sum(Column2==""),
1059 length(Column3)-sum(Column3==""),
1060 length(Column4)-sum(Column4==""),
1061 length(Column5)-sum(Column5==""),
1062 length(Column6)-sum(Column6==""),
1063 length(Column7)-sum(Column7==""),
1064 length(Column8)-sum(Column8==""),
1065 length(Column9)-sum(Column9==""),
1066 length(Column10)-sum(Column10==""),
1067 length(Column11)-sum(Column11==""),
1068 length(Column12)-sum(Column12==""),
1069 length(Column13)-sum(Column13==""),
1070 length(Column14)-sum(Column14==""),
1071 length(Column15)-sum(Column15==""))
1072
1073 for (z in 1:15) {
1074 for (y in 1:20) {
1075 if (PositionTable[y,z]>0){
1076 EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y]))
1077 }
1078 if (PositionTable[y,z]==0){
1079 EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y]))
1080 }
1081 }
1082 }
1083 #here I created the endogenous probability matrix
1084 #now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs
1085
1086
1087
1088
1089
1090 # write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE)
1091 # write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE)
1092 # write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE)
1093 # write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE)
1094 # write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE)
1095
1096 NormalizationScore<-c("Normalization Score",NormalizationScore)
1097
1098 write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1099 write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1100 write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1101 write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE)
1102
1103 EPMtableu<-EPMtable
1104 HeaderSD<-c(-7:7)
1105 EPMtableu<-rbind(HeaderSD,EPMtableu)
1106 row.names(EPMtableu)<-NULL
1107 EPMtableu<-data.frame(SetOfAAs,EPMtableu)
1108
1109 write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
1110 SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1)
1111 head<-matrix(data=rep(" ",times=16),nrow = 1)
1112 SelectivityHeader<-rbind(head,SelectivityHeader)
1113
1114 write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
1115 #colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7")
1116 write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE)
1117 write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1118 write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
1119 write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150 #test myself: this script should take in amino acids for each of the 9 positions and give out every single combination of those AAs
1151
1152 #need to do following: fix it so that the accession numbers stay with the substrates,
1153 #also the neg false constant is totaly unphos'd Ys found by FASTA-2-CSV system# uniprot
1154
1155 #HOW MANY: IF THERE'S two aas in each position you get 2^9, so I assume the numbers are:
1156 #(number in position-4)*(number in position -3)*(number in position -2)...=total
1157 # require(rJava)
1158 # require(xlsxjars)
1159 # require(xlsx)
1160 # # require(readxl)
1161
1162 #View(SDtable)
1163 bareSDs<-SDtable[2:21,2:16]
1164 goodones<-bareSDs>2
1165
1166 # Positionm7<-which(goodones[,1] %in% TRUE)
1167 # if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))}
1168 # Positionm6<-which(goodones[,2] %in% TRUE)
1169 # if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))}
1170 # Positionm5<-which(goodones[,3] %in% TRUE)
1171 # if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))}
1172 # Positionm4<-which(goodones[,4] %in% TRUE)
1173 # if (length(Positionm4)<2){Positionm4<-bareSDs[,4][order(bareSDs[,4])[1:2]]}
1174 # Positionm3<-which(goodones[,5] %in% TRUE)
1175 # if (length(Positionm3)<2){Positionm3<-bareSDs[,5][order(bareSDs[,5])[1:2]]}
1176 # Positionm2<-which(goodones[,6] %in% TRUE)
1177 # if (length(Positionm2)<2){Positionm2<-bareSDs[,6][order(bareSDs[,6])[1:2]]}
1178 # Positionm1<-which(goodones[,7] %in% TRUE)
1179 # if (length(Positionm1)<2){Positionm1<-bareSDs[,7][order(bareSDs[,7])[1:2]]}
1180 #
1181 # Positiond0<-which(goodones[,8] %in% TRUE)
1182 # if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))}
1183 #
1184 # Positionp1<-which(goodones[,9] %in% TRUE)
1185 # if (length(Positionp1)<2){Positionp1<-bareSDs[,9][order(bareSDs[,9])[1:2]]}
1186 # Positionp2<-which(goodones[,10] %in% TRUE)
1187 # if (length(Positionp2)<2){Positionp2<-bareSDs[,10][order(bareSDs[,10])[1:2]]}
1188 # Positionp3<-which(goodones[,11] %in% TRUE)
1189 # if (length(Positionp3)<2){Positionp3<-bareSDs[,11][order(bareSDs[,11])[1:2]]}
1190 # Positionp4<-which(goodones[,12] %in% TRUE)
1191 # if (length(Positionp4)<2){Positionp4<-bareSDs[,12][order(bareSDs[,12])[1:2]]}
1192 # Positionp5<-which(goodones[,13] %in% TRUE)
1193 # if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))}
1194 # Positionp6<-which(goodones[,14] %in% TRUE)
1195 # if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))}
1196 # Positionp7<-which(goodones[,15] %in% TRUE)
1197 # if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))}
1198
1199
1200
1201
1202 # Positionm7<-which(goodones[,1] %in% TRUE)
1203 # if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))}
1204 # Positionm6<-which(goodones[,2] %in% TRUE)
1205 # if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))}
1206 # Positionm5<-which(goodones[,3] %in% TRUE)
1207 # if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))}
1208 # Positionm4<-which(goodones[,4] %in% TRUE)
1209 # if (length(Positionm4)<1){Positionm4<-which(bareSDs[,4]==max(bareSDs[,4]))}
1210 # Positionm3<-which(goodones[,5] %in% TRUE)
1211 # if (length(Positionm3)<1){Positionm3<-which(bareSDs[,5]==max(bareSDs[,5]))}
1212 # Positionm2<-which(goodones[,6] %in% TRUE)
1213 # if (length(Positionm2)<1){Positionm2<-which(bareSDs[,6]==max(bareSDs[,6]))}
1214 # Positionm1<-which(goodones[,7] %in% TRUE)
1215 # if (length(Positionm1)<1){Positionm1<-which(bareSDs[,7]==max(bareSDs[,7]))}
1216 #
1217 # Positiond0<-which(goodones[,8] %in% TRUE)
1218 # if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))}
1219 #
1220 # Positionp1<-which(goodones[,9] %in% TRUE)
1221 # if (length(Positionp1)<1){Positionp1<-which(bareSDs[,9]==max(bareSDs[,9]))}
1222 # Positionp2<-which(goodones[,10] %in% TRUE)
1223 # if (length(Positionp2)<1){Positionp2<-which(bareSDs[,10]==max(bareSDs[,10]))}
1224 # Positionp3<-which(goodones[,11] %in% TRUE)
1225 # if (length(Positionp3)<1){Positionp3<-which(bareSDs[,11]==max(bareSDs[,11]))}
1226 # Positionp4<-which(goodones[,12] %in% TRUE)
1227 # if (length(Positionp4)<1){Positionp4<-which(bareSDs[,12]==max(bareSDs[,12]))}
1228 # Positionp5<-which(goodones[,13] %in% TRUE)
1229 # if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))}
1230 # Positionp6<-which(goodones[,14] %in% TRUE)
1231 # if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))}
1232 # Positionp7<-which(goodones[,15] %in% TRUE)
1233 # if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))}
1234
1235 match(c(bareSDs[,2][order(bareSDs[,2])[1:2]]),bareSDs[,2])
1236
1237 Positionm7<-which(goodones[,1] %in% TRUE)
1238 if (length(Positionm7)<3){Positionm7<-match(c(bareSDs[,1][order(bareSDs[,1])[19:20]]),bareSDs[,1])}
1239 Positionm6<-which(goodones[,2] %in% TRUE)
1240 if (length(Positionm6)<3){Positionm6<-match(c(bareSDs[,2][order(bareSDs[,2])[19:20]]),bareSDs[,2])}
1241 Positionm5<-which(goodones[,3] %in% TRUE)
1242 if (length(Positionm5)<3){Positionm5<-match(c(bareSDs[,3][order(bareSDs[,3])[19:20]]),bareSDs[,3])}
1243 Positionm4<-which(goodones[,4] %in% TRUE)
1244 if (length(Positionm4)<3){Positionm4<-match(c(bareSDs[,4][order(bareSDs[,4])[19:20]]),bareSDs[,4])}
1245 Positionm3<-which(goodones[,5] %in% TRUE)
1246 if (length(Positionm3)<3){Positionm3<-match(c(bareSDs[,5][order(bareSDs[,5])[19:20]]),bareSDs[,5])}
1247 Positionm2<-which(goodones[,6] %in% TRUE)
1248 if (length(Positionm2)<3){Positionm2<-match(c(bareSDs[,6][order(bareSDs[,6])[19:20]]),bareSDs[,6])}
1249 Positionm1<-which(goodones[,7] %in% TRUE)
1250 if (length(Positionm1)<3){Positionm1<-match(c(bareSDs[,7][order(bareSDs[,7])[19:20]]),bareSDs[,7])}
1251
1252 Positiond0<-which(goodones[,8] %in% TRUE)
1253 #if (length(Positiond0)<3){Positiond0<-bareSDs[,8][order(bareSDs[,8])[1:2]]}
1254
1255 Positionp1<-which(goodones[,9] %in% TRUE)
1256 if (length(Positionp1)<3){Positionp1<-match(c(bareSDs[,9][order(bareSDs[,9])[19:20]]),bareSDs[,9])}
1257 Positionp2<-which(goodones[,10] %in% TRUE)
1258 if (length(Positionp2)<3){Positionp2<-match(c(bareSDs[,10][order(bareSDs[,10])[19:20]]),bareSDs[,10])}
1259 Positionp3<-which(goodones[,11] %in% TRUE)
1260 if (length(Positionp3)<3){Positionp3<-match(c(bareSDs[,11][order(bareSDs[,11])[19:20]]),bareSDs[,11])}
1261 Positionp4<-which(goodones[,12] %in% TRUE)
1262 if (length(Positionp4)<3){Positionp4<-match(c(bareSDs[,12][order(bareSDs[,12])[19:20]]),bareSDs[,12])}
1263 Positionp5<-which(goodones[,13] %in% TRUE)
1264 if (length(Positionp5)<3){Positionp5<-match(c(bareSDs[,13][order(bareSDs[,13])[19:20]]),bareSDs[,13])}
1265 Positionp6<-which(goodones[,14] %in% TRUE)
1266 if (length(Positionp6)<3){Positionp6<-match(c(bareSDs[,14][order(bareSDs[,14])[19:20]]),bareSDs[,14])}
1267 Positionp7<-which(goodones[,15] %in% TRUE)
1268 if (length(Positionp7)<3){Positionp7<-match(c(bareSDs[,15][order(bareSDs[,15])[19:20]]),bareSDs[,15])}
1269
1270
1271 aa_props2 <- c("1"="A", "2"="C", "3"="D", "4"="E", "5"="F", "6"="G", "7"="H", "8"="I", "9"="K", "10"="L", "11"="M", "12"="N",
1272 "13"="P", "14"="Q", "15"="R", "16"="S", "17"="T", "18"="V", "19"="W", "20"="Y")
1273
1274 Positionm7<-sapply(Positionm7, function (x) aa_props2[x])
1275 Positionm6<-sapply(Positionm6, function (x) aa_props2[x])
1276 Positionm5<-sapply(Positionm5, function (x) aa_props2[x])
1277 Positionm4<-sapply(Positionm4, function (x) aa_props2[x])
1278 Positionm3<-sapply(Positionm3, function (x) aa_props2[x])
1279 Positionm2<-sapply(Positionm2, function (x) aa_props2[x])
1280 Positionm1<-sapply(Positionm1, function (x) aa_props2[x])
1281 Positiond0<-sapply(Positiond0, function (x) aa_props2[x])
1282 Positionp1<-sapply(Positionp1, function (x) aa_props2[x])
1283 Positionp2<-sapply(Positionp2, function (x) aa_props2[x])
1284 Positionp3<-sapply(Positionp3, function (x) aa_props2[x])
1285 Positionp4<-sapply(Positionp4, function (x) aa_props2[x])
1286 Positionp5<-sapply(Positionp5, function (x) aa_props2[x])
1287 Positionp6<-sapply(Positionp6, function (x) aa_props2[x])
1288 Positionp7<-sapply(Positionp7, function (x) aa_props2[x])
1289
1290
1291 # Positionm7<-c("D","H","N","V")
1292 # Positionm6<-c("E","V")
1293 # Positionm5<-c("D","H")
1294 # Positionm4<-c("D","N")
1295 # Positionm3<-c("D","E","F","Q")
1296 # Positionm2<-c("D","N","Q","S")
1297 # Positionm1<-c("F","I","L")
1298 # Positiond0<-c("Y")
1299 # Positionp1<-c("A","E")
1300 # Positionp2<-c("T","S","Q","E")
1301 # Positionp3<-c("V")
1302 # Positionp4<-c("K")
1303 # Positionp5<-c("K")
1304 # Positionp6<-c("K")
1305 # Positionp7<-c("R")
1306 #this is where the amino acids for each position are given. m means minus, p mean plus
1307 ########################################
1308 # ScreenerFilename<-"C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls"
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320 screaner<-read.csv(ScreenerFilename, header = FALSE, stringsAsFactors = FALSE)
1321
1322
1323 AKT<-screaner[2:25,]
1324 AMPK<-screaner[27:50,]
1325 CDK1<-screaner[52:75,]
1326 CHK1<-screaner[77:100,]
1327 CK2A1<-screaner[102:125,]
1328 LKB1<-screaner[127:150,]
1329 MAPK3<-screaner[152:175,]
1330 MAPK13<-screaner[177:200,]
1331 MTOR<-screaner[202:225,]
1332 # Pyk2<-screaner[227:250,]
1333 # Src<-screaner[252:275,]
1334 # Syk<-screaner[277:300,]
1335 # Yes<-screaner[302:325,]
1336
1337 #two questions: why are we doing CDK1 when we already have a bioninformatics page about it?
1338 #two I reran everything and only get 96 positions of interest in the SD table
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348 #Do_You_want_An_Excel_Output_Questionmark<-"NO"
1349 GeneratedPeptidesFile<-"GeneratedPeptidesFile.csv"
1350
1351
1352 # AKT<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 4)
1353 # AMPK<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 5)
1354 # CDK1<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 6)
1355 # CHK1<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 7)
1356 # CK2A1<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 8)
1357 # LKB1<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 9)
1358 # MAPK3<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 10)
1359 # MAPK13<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 11)
1360 # MTOR<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 12)
1361 # Pyk2<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 13)
1362 # Src<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 14)
1363 # Syk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 15)
1364 # Yes<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 16)
1365 #
1366
1367
1368 "A"=1
1369 "C"=2
1370 "D"=3
1371 "E"=4
1372 "F"=5
1373 "G"=6
1374 "H"=7
1375 "I"=8
1376 "K"=9
1377 "L"=10
1378 "M"=11
1379 "N"=12
1380 "P"=13
1381 "Q"=14
1382 "R"=15
1383 "S"=16
1384 "T"=17
1385 "V"=18
1386 "W"=19
1387 "Y"=20
1388
1389 aa_props <- c("A"=A, "C"=C, "D"=D, "E"=E, "F"=F,"G"=G,"H"=H,"I"=I,"K"=K,"L"=L,"M"=M,"N"=N,"P"=P,"Q"=Q,"R"=R,
1390 "S"=S,"T"=T,"V"=V,"W"=W,"Y"=Y,"xY"=Y,"O"=21)
1391
1392 number15<-sapply(Positionm7, function (x) aa_props[x])
1393 number14<-sapply(Positionm6, function (x) aa_props[x])
1394 number13<-sapply(Positionm5, function (x) aa_props[x])
1395 number1 <- sapply(Positionm4, function (x) aa_props[x])
1396 number2 <- sapply(Positionm3, function (x) aa_props[x])
1397 number3 <- sapply(Positionm2, function (x) aa_props[x])
1398 number4 <- sapply(Positionm1, function (x) aa_props[x])
1399 number5 <- sapply(Positiond0, function (x) aa_props[x])
1400 number6 <- sapply(Positionp1, function (x) aa_props[x])
1401 number7 <- sapply(Positionp2, function (x) aa_props[x])
1402 number8 <- sapply(Positionp3, function (x) aa_props[x])
1403 number9 <- sapply(Positionp4, function (x) aa_props[x])
1404 number10<-sapply(Positionp5, function (x) aa_props[x])
1405 number11<-sapply(Positionp6, function (x) aa_props[x])
1406 number12<-sapply(Positionp7, function (x) aa_props[x])
1407
1408 # number1<-Positionm4
1409 # number2<-Positionm3
1410 # number3<-Positionm2
1411 # number4<-Positionm1
1412 # number5<-Positiond0
1413 # number6<-Positionp1
1414 # number7<-Positionp2
1415 # number8<-Positionp3
1416 # number9<-Positionp4
1417
1418 #############################
1419 ########################################
1420
1421
1422 total=length(Positionp7)*length(Positionp6)*length(Positionp5)*length(Positionp4)*length(Positionp3)*(length(Positionp2))*length(Positionp1)*
1423 length(Positiond0)*length(Positionm1)*length(Positionm2)*length(Positionm3)*length(Positionm4)*length(Positionm5)*length(Positionm6)*length(Positionm7)
1424 #this is just a way to doublecheck that the length of the generated peptides vector is correct
1425
1426 GeneratedPeptides<-rep(NA, times=total*15)
1427 GeneratedPeptides<-matrix(data = GeneratedPeptides,ncol = 15)
1428
1429 NumeratedPeptides<-GeneratedPeptides
1430 #create an empty vector of correct length by finding the number of each AAs per position and multiplying them
1431 count<-0
1432 for (t in 1:length(Positionm7)) {
1433 for (s in 1:length(Positionm6)) {
1434 for (r in 1:length(Positionm5)) {
1435 for (i in 1:length(Positionm4)) {
1436 for (j in 1:length(Positionm3)) {
1437 for (k in 1:length(Positionm2)) {
1438 for (l in 1:length(Positionm1)) {
1439 for (m in 1:length(Positiond0)) {
1440 for (n in 1:length(Positionp1)) {
1441 for (o in 1:length(Positionp2)) {
1442 for (p in 1:length(Positionp3)) {
1443 for (q in 1:length(Positionp4)) {
1444 for (u in 1:length(Positionp5)) {
1445 for (v in 1:length(Positionp6)) {
1446 for (w in 1:length(Positionp7)) {
1447 # i=1
1448 # j=1
1449 # k=1
1450 # l=1
1451 # m=1
1452 # n=1
1453 # o=1
1454 # p=1
1455 # q=1
1456 #
1457 #for every single position, increment the count number, create a peptide using the AAs at that position
1458 #then put them together into the generated peptides sequencex
1459 count<-count+1
1460 tabulation<-c(Positionm7[t],Positionm6[s],Positionm5[r],Positionm4[i],Positionm3[j],Positionm2[k],Positionm1[l],Positiond0[m],Positionp1[n],
1461 Positionp2[o],Positionp3[p],Positionp4[q],Positionp5[u],Positionp6[v],Positionp7[w])
1462 numeration<-c(number15[t],number14[s],number13[r],number1[i],number2[j],number3[k],number4[l],number5[m],number6[n],number7[o],number8[p],number9[q],number10[u],number11[v],
1463 number12[w])
1464 #tabulation<-paste(tabulation, sep="", collapse="")
1465 GeneratedPeptides[count,1:15]<-tabulation
1466 NumeratedPeptides[count,1:15]<-numeration
1467 }
1468 }
1469 }
1470 }
1471 }
1472 }
1473 }
1474 }
1475 }
1476 }
1477 }
1478 }
1479 }
1480 }
1481 }
1482 ####################################################################
1483 #now here I use the Endogenous Probabilty matrix from the previous script, which is called EMPtable
1484 #to score the created peptides
1485 ThisKinTable<-EPMtableu#[1:nrow(SDtable),]
1486 TKTcolumn<-c(data=rep(1,times=21))
1487 TKTcolumn<-as.matrix(TKTcolumn,ncol=1)
1488 ThisKinTable<-cbind(TKTcolumn,ThisKinTable)
1489
1490 ThisKinGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1491 ThisKinGenWeirdScore<-rep(NA,times=nrow(GeneratedPeptides))
1492
1493 for (x in 1:nrow(GeneratedPeptides)){
1494 Scoringpeptide<-NumeratedPeptides[x,1:15]
1495 Scoringpeptide<-Scoringpeptide+1
1496 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]*
1497 ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]*
1498 #ThisKinTable[as.numeric(Scoringpeptide[8]),10]*
1499 ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]*
1500 ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17]
1501 ThisKinGeneratedScores[x]<-ThisKinTableScore
1502 ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2])))
1503 ThisKinGenWeirdScore[x]<-ThisKinTableScore
1504 }
1505
1506 AKTGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1507 AMPKGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1508 CDK1GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1509 CHK1GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1510 CK2A1GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1511 LKB1GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1512 MAPK3GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1513 MAPK13GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1514 MTORGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
1515
1516 for (x in 1:nrow(GeneratedPeptides)){
1517 Scoringpeptide<-NumeratedPeptides[x,1:15]
1518 AKTScore<-AKT[Scoringpeptide[1],2]*AKT[Scoringpeptide[2],3]*AKT[Scoringpeptide[3],4]*AKT[Scoringpeptide[4],5]*AKT[Scoringpeptide[5],6]*AKT[Scoringpeptide[6],7]*
1519 AKT[Scoringpeptide[7],8]*AKT[Scoringpeptide[9],10]*AKT[Scoringpeptide[10],11]*AKT[Scoringpeptide[11],12]*AKT[Scoringpeptide[12],13]*
1520 AKT[Scoringpeptide[13],14]*AKT[Scoringpeptide[14],15]*AKT[Scoringpeptide[15],16]
1521 AKTGeneratedScores[x]<-AKTScore
1522
1523 AMPKScore<-AMPK[Scoringpeptide[1],2]*AMPK[Scoringpeptide[2],3]*AMPK[Scoringpeptide[3],4]*AMPK[Scoringpeptide[4],5]*AMPK[Scoringpeptide[5],6]*AMPK[Scoringpeptide[6],7]*
1524 AMPK[Scoringpeptide[7],8]*AMPK[Scoringpeptide[9],10]*AMPK[Scoringpeptide[10],11]*AMPK[Scoringpeptide[11],12]*AMPK[Scoringpeptide[12],13]*
1525 AMPK[Scoringpeptide[13],14]*AMPK[Scoringpeptide[14],15]*AMPK[Scoringpeptide[15],16]
1526 AMPKGeneratedScores[x]<-AMPKScore
1527
1528 CDK1Score<-CDK1[Scoringpeptide[1],2]*CDK1[Scoringpeptide[2],3]*CDK1[Scoringpeptide[3],4]*CDK1[Scoringpeptide[4],5]*CDK1[Scoringpeptide[5],6]*CDK1[Scoringpeptide[6],7]*
1529 CDK1[Scoringpeptide[7],8]*CDK1[Scoringpeptide[9],10]*CDK1[Scoringpeptide[10],11]*CDK1[Scoringpeptide[11],12]*CDK1[Scoringpeptide[12],13]*
1530 CDK1[Scoringpeptide[13],14]*CDK1[Scoringpeptide[14],15]*CDK1[Scoringpeptide[15],16]
1531 CDK1GeneratedScores[x]<-CDK1Score
1532
1533 CHK1Score<-CHK1[Scoringpeptide[1],2]*CHK1[Scoringpeptide[2],3]*CHK1[Scoringpeptide[3],4]*CHK1[Scoringpeptide[4],5]*CHK1[Scoringpeptide[5],6]*CHK1[Scoringpeptide[6],7]*
1534 CHK1[Scoringpeptide[7],8]*CHK1[Scoringpeptide[9],10]*CHK1[Scoringpeptide[10],11]*CHK1[Scoringpeptide[11],12]*CHK1[Scoringpeptide[12],13]*
1535 CHK1[Scoringpeptide[13],14]*CHK1[Scoringpeptide[14],15]*CHK1[Scoringpeptide[15],16]
1536 CHK1GeneratedScores[x]<-CHK1Score
1537
1538 CK2A1Score<-CK2A1[Scoringpeptide[1],2]*CK2A1[Scoringpeptide[2],3]*CK2A1[Scoringpeptide[3],4]*CK2A1[Scoringpeptide[4],5]*CK2A1[Scoringpeptide[5],6]*CK2A1[Scoringpeptide[6],7]*
1539 CK2A1[Scoringpeptide[7],8]*CK2A1[Scoringpeptide[9],10]*CK2A1[Scoringpeptide[10],11]*CK2A1[Scoringpeptide[11],12]*CK2A1[Scoringpeptide[12],13]*
1540 CK2A1[Scoringpeptide[13],14]*CK2A1[Scoringpeptide[14],15]*CK2A1[Scoringpeptide[15],16]
1541 CK2A1GeneratedScores[x]<-CK2A1Score
1542
1543 LKB1Score<-LKB1[Scoringpeptide[1],2]*LKB1[Scoringpeptide[2],3]*LKB1[Scoringpeptide[3],4]*LKB1[Scoringpeptide[4],5]*LKB1[Scoringpeptide[5],6]*LKB1[Scoringpeptide[6],7]*
1544 LKB1[Scoringpeptide[7],8]*LKB1[Scoringpeptide[9],10]*LKB1[Scoringpeptide[10],11]*LKB1[Scoringpeptide[11],12]*LKB1[Scoringpeptide[12],13]*
1545 LKB1[Scoringpeptide[13],14]*LKB1[Scoringpeptide[14],15]*LKB1[Scoringpeptide[15],16]
1546 LKB1GeneratedScores[x]<-LKB1Score
1547
1548 MAPK3Score<-MAPK3[Scoringpeptide[1],2]*MAPK3[Scoringpeptide[2],3]*MAPK3[Scoringpeptide[3],4]*MAPK3[Scoringpeptide[4],5]*MAPK3[Scoringpeptide[5],6]*MAPK3[Scoringpeptide[6],7]*
1549 MAPK3[Scoringpeptide[7],8]*MAPK3[Scoringpeptide[9],10]*MAPK3[Scoringpeptide[10],11]*MAPK3[Scoringpeptide[11],12]*MAPK3[Scoringpeptide[12],13]*
1550 MAPK3[Scoringpeptide[13],14]*MAPK3[Scoringpeptide[14],15]*MAPK3[Scoringpeptide[15],16]
1551 MAPK3GeneratedScores[x]<-MAPK3Score
1552
1553 MAPK13Score<-MAPK13[Scoringpeptide[1],2]*MAPK13[Scoringpeptide[2],3]*MAPK13[Scoringpeptide[3],4]*MAPK13[Scoringpeptide[4],5]*MAPK13[Scoringpeptide[5],6]*MAPK13[Scoringpeptide[6],7]*
1554 MAPK13[Scoringpeptide[7],8]*MAPK13[Scoringpeptide[9],10]*MAPK13[Scoringpeptide[10],11]*MAPK13[Scoringpeptide[11],12]*MAPK13[Scoringpeptide[12],13]*
1555 MAPK13[Scoringpeptide[13],14]*MAPK13[Scoringpeptide[14],15]*MAPK13[Scoringpeptide[15],16]
1556 MAPK13GeneratedScores[x]<-MAPK13Score
1557
1558 MTORScore<-MTOR[Scoringpeptide[1],2]*MTOR[Scoringpeptide[2],3]*MTOR[Scoringpeptide[3],4]*MTOR[Scoringpeptide[4],5]*MTOR[Scoringpeptide[5],6]*MTOR[Scoringpeptide[6],7]*
1559 MTOR[Scoringpeptide[7],8]*MTOR[Scoringpeptide[9],10]*MTOR[Scoringpeptide[10],11]*MTOR[Scoringpeptide[11],12]*MTOR[Scoringpeptide[12],13]*
1560 MTOR[Scoringpeptide[13],14]*MTOR[Scoringpeptide[14],15]*MTOR[Scoringpeptide[15],16]
1561 MTORGeneratedScores[x]<-MTORScore
1562
1563 # Pyk2Score<-Pyk2[Scoringpeptide[1],2]*Pyk2[Scoringpeptide[2],3]*Pyk2[Scoringpeptide[3],4]*Pyk2[Scoringpeptide[4],5]*Pyk2[Scoringpeptide[5],6]*Pyk2[Scoringpeptide[6],7]*
1564 # Pyk2[Scoringpeptide[7],8]*Pyk2[Scoringpeptide[9],10]*Pyk2[Scoringpeptide[10],11]*Pyk2[Scoringpeptide[11],12]*Pyk2[Scoringpeptide[12],13]*
1565 # Pyk2[Scoringpeptide[13],14]*Pyk2[Scoringpeptide[14],15]*Pyk2[Scoringpeptide[15],16]
1566 # Pyk2GeneratedScores[x]<-Pyk2Score
1567 #
1568 # SrcScore<-Src[Scoringpeptide[1],2]*Src[Scoringpeptide[2],3]*Src[Scoringpeptide[3],4]*Src[Scoringpeptide[4],5]*Src[Scoringpeptide[5],6]*Src[Scoringpeptide[6],7]*
1569 # Src[Scoringpeptide[7],8]*Src[Scoringpeptide[9],10]*Src[Scoringpeptide[10],11]*Src[Scoringpeptide[11],12]*Src[Scoringpeptide[12],13]*
1570 # Src[Scoringpeptide[13],14]*Src[Scoringpeptide[14],15]*Src[Scoringpeptide[15],16]
1571 # SrcGeneratedScores[x]<-SrcScore
1572 #
1573 # SykScore<-Syk[Scoringpeptide[1],2]*Syk[Scoringpeptide[2],3]*Syk[Scoringpeptide[3],4]*Syk[Scoringpeptide[4],5]*Syk[Scoringpeptide[5],6]*Syk[Scoringpeptide[6],7]*
1574 # Syk[Scoringpeptide[7],8]*Syk[Scoringpeptide[9],10]*Syk[Scoringpeptide[10],11]*Syk[Scoringpeptide[11],12]*Syk[Scoringpeptide[12],13]*
1575 # Syk[Scoringpeptide[13],14]*Syk[Scoringpeptide[14],15]*Syk[Scoringpeptide[15],16]
1576 # SykGeneratedScores[x]<-SykScore
1577 #
1578 # YesScore<-Yes[Scoringpeptide[1],2]*Yes[Scoringpeptide[2],3]*Yes[Scoringpeptide[3],4]*Yes[Scoringpeptide[4],5]*Yes[Scoringpeptide[5],6]*Yes[Scoringpeptide[6],7]*
1579 # Yes[Scoringpeptide[7],8]*Yes[Scoringpeptide[9],10]*Yes[Scoringpeptide[10],11]*Yes[Scoringpeptide[11],12]*Yes[Scoringpeptide[12],13]*
1580 # Yes[Scoringpeptide[13],14]*Yes[Scoringpeptide[14],15]*Yes[Scoringpeptide[15],16]
1581 # YesGeneratedScores[x]<-YesScore
1582
1583 # ThisKinTableScore<-ThisKinTable[as.numeric(Scoringpeptide[1]),3]*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]*
1584 # ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*
1585 # ThisKinTable[as.numeric(Scoringpeptide[7]),9]*ThisKinTable[as.numeric(Scoringpeptide[8]),10]*ThisKinTable[as.numeric(Scoringpeptide[9]),11]
1586 # ThisKinGeneratedScores[x]<-ThisKinTableScore
1587 }
1588
1589
1590
1591 AKTNorm<-1/as.numeric(AKT[22,1])
1592 AKTThresh<-as.numeric(AKT[24,1])
1593 AKTTrueThresh<-((AKTThresh*AKTNorm)/(100-AKTThresh))
1594 AKTActive<-unlist(AKTGeneratedScores)>AKTTrueThresh
1595 if (TodaysKinase=="AKT"){AKTActive<-rep(0,times=nrow(GeneratedPeptides))}
1596
1597 AMPKNorm<-1/as.numeric(AMPK[22,1])
1598 AMPKThresh<-as.numeric(AMPK[24,1])
1599 AMPKTrueThresh<-((AMPKThresh*AMPKNorm)/(100-AMPKThresh))
1600 AMPKActive<-unlist(AMPKGeneratedScores)>AMPKTrueThresh
1601 if (TodaysKinase=="AMPK"){AMPKActive<-rep(0,times=nrow(GeneratedPeptides))}
1602
1603 CDK1Norm<-1/as.numeric(CDK1[22,1])
1604 CDK1Thresh<-as.numeric(CDK1[24,1])
1605 CDK1TrueThresh<-((CDK1Thresh*CDK1Norm)/(100-CDK1Thresh))
1606 CDK1Active<-unlist(CDK1GeneratedScores)>CDK1TrueThresh
1607 if (TodaysKinase=="CDK1"){CDK1Active<-rep(0,times=nrow(GeneratedPeptides))}
1608
1609 CHK1Norm<-1/as.numeric(CHK1[22,1])
1610 CHK1Thresh<-as.numeric(CHK1[24,1])
1611 CHK1TrueThresh<-((CHK1Thresh*CHK1Norm)/(100-CHK1Thresh))
1612 CHK1Active<-(CHK1GeneratedScores)>CHK1TrueThresh
1613 if (TodaysKinase=="CHK1"){CHK1Active<-rep(0,times=nrow(GeneratedPeptides))}
1614
1615 CK2A1Norm<-1/as.numeric(CK2A1[22,1])
1616 CK2A1Thresh<-as.numeric(CK2A1[24,1])
1617 CK2A1TrueThresh<-((CK2A1Thresh*CK2A1Norm)/(100-CK2A1Thresh))
1618 CK2A1Active<-unlist(CK2A1GeneratedScores)>CK2A1TrueThresh
1619 if (TodaysKinase=="CK2A1"){CK2A1Active<-rep(0,times=nrow(GeneratedPeptides))}
1620
1621 LKB1Norm<-1/as.numeric(LKB1[22,1])
1622 LKB1Thresh<-as.numeric(LKB1[24,1])
1623 LKB1TrueThresh<-((LKB1Thresh*LKB1Norm)/(100-LKB1Thresh))
1624 LKB1Active<-unlist(LKB1GeneratedScores)>LKB1TrueThresh
1625 if (TodaysKinase=="LKB1"){LKB1Active<-rep(0,times=nrow(GeneratedPeptides))}
1626
1627 MAPK3Norm<-1/as.numeric(MAPK3[22,1])
1628 MAPK3Thresh<-as.numeric(MAPK3[24,1])
1629 MAPK3TrueThresh<-((MAPK3Thresh*MAPK3Norm)/(100-MAPK3Thresh))
1630 MAPK3Active<-unlist(MAPK3GeneratedScores)>MAPK3TrueThresh
1631 if (TodaysKinase=="MAPK3"){MAPK3Active<-rep(0,times=nrow(GeneratedPeptides))}
1632
1633 MAPK13Norm<-1/as.numeric(MAPK13[22,1])
1634 MAPK13Thresh<-as.numeric(MAPK13[24,1])
1635 MAPK13TrueThresh<-((MAPK13Thresh*MAPK13Norm)/(100-MAPK13Thresh))
1636 MAPK13Active<-unlist(MAPK13GeneratedScores)>MAPK13TrueThresh
1637 if (TodaysKinase=="MAPK13"){MAPK13Active<-rep(0,times=nrow(GeneratedPeptides))}
1638
1639 MTORNorm<-1/as.numeric(MTOR[22,1])
1640 MTORThresh<-as.numeric(MTOR[24,1])
1641 MTORTrueThresh<-((MTORThresh*MTORNorm)/(100-MTORThresh))
1642 MTORActive<-unlist(MTORGeneratedScores)>MTORTrueThresh
1643 if (TodaysKinase=="MTOR"){MTORActive<-rep(0,times=nrow(GeneratedPeptides))}
1644
1645 # Pyk2Norm<-1/as.numeric(Pyk2[22,1])
1646 # Pyk2Thresh<-as.numeric(Pyk2[24,1])
1647 # Pyk2TrueThresh<-((Pyk2Thresh*Pyk2Norm)/(100-Pyk2Thresh))
1648 # Pyk2Active<-unlist(Pyk2GeneratedScores)>Pyk2TrueThresh
1649 # if (TodaysKinase=="PYK2"){Pyk2Active<-rep(0,times=nrow(GeneratedPeptides))}
1650 #
1651 # SrcNorm<-1/as.numeric(Src[22,1])
1652 # SrcThresh<-as.numeric(Src[24,1])
1653 # SrcTrueThresh<-((SrcThresh*SrcNorm)/(100-SrcThresh))
1654 # SrcActive<-unlist(SrcGeneratedScores)>SrcTrueThresh
1655 # if (TodaysKinase=="SRC"){SrcActive<-rep(0,times=nrow(GeneratedPeptides))}
1656 #
1657 # SykNorm<-1/as.numeric(Syk[22,1])
1658 # SykThresh<-as.numeric(Syk[24,1])
1659 # SykTrueThresh<-((SykThresh*SykNorm)/(100-SykThresh))
1660 # SykActive<-unlist(SykGeneratedScores)>SykTrueThresh
1661 # if (TodaysKinase=="SYK"){SykActive<-rep(0,times=nrow(GeneratedPeptides))}
1662 #
1663 # YesNorm<-1/as.numeric(Yes[22,1])
1664 # YesThresh<-as.numeric(Yes[24,1])
1665 # YesTrueThresh<-((YesThresh*YesNorm)/(100-YesThresh))
1666 # YesActive<-unlist(YesGeneratedScores)>YesTrueThresh
1667 # if (TodaysKinase=="YES"){YesActive<-rep(0,times=nrow(GeneratedPeptides))}
1668
1669 AllActive<-AKTActive+AMPKActive+CDK1Active+CHK1Active+CK2A1Active+LKB1Active+MAPK3Active+MAPK13Active+MTORActive
1670
1671 Scores<-ThisKinGeneratedScores
1672 ThresholdValues<-ThisKinGenWeirdScore
1673
1674 FullMotifs<-rep("Z",times=nrow(GeneratedPeptides))
1675 for (i in 1:nrow(GeneratedPeptides)) {
1676 motif<-GeneratedPeptides[i,1:15]
1677 motif<-paste(motif,sep = "", collapse = "")
1678 FullMotifs[i]<-motif
1679 }
1680
1681 PeptidesWithRanks<-cbind.data.frame(FullMotifs,GeneratedPeptides,Scores,ThresholdValues)
1682 PeptidesWithRanks<-cbind.data.frame(PeptidesWithRanks,AllActive,AKTActive,AMPKActive,CDK1Active,CHK1Active,CK2A1Active,LKB1Active,MAPK3Active,MAPK13Active,MTORActive)
1683 RanksPeptides<-PeptidesWithRanks[order(PeptidesWithRanks$ThresholdValues,decreasing = FALSE),]
1684 # PepRankHead<-c(1:9,"Sequence","RPMS","PMS")
1685 # RanksPeptides<-rbind.data.frame(PepRankHead,PeptidesWithRanks)
1686 #head(RanksPeptides)
1687
1688
1689 #now I have to score the negative sequences... for some reason
1690 #write up how we transfect with lipofectamine
1691 #3,4,5 questions
1692
1693 #PAUSED EHRE AT 4:50, HOPING THAT FIXING MINERVOTHING SO THAT LEFT SPACES WORKS FIXES A THING. OTHERWISE
1694 #I FUCKED WITH THE MCC TABLE AND NEED TO FINISH IT
1695
1696 ThisKinBlanks<-rep(1,times=17)
1697 #indx <- sapply(breast, is.factor)
1698 #ThisKinTable[indx] <- lapply(ThisKinTable[indx], function(x) as.character(x))
1699 ThisKinTable$SetOfAAs<-as.character(ThisKinTable$SetOfAAs)
1700
1701 #ThisKinTest<-rbind.data.frame(ThisKinTable,ThisKinBlanks)
1702 ThisKinTable<-rbind.data.frame(ThisKinTable,ThisKinBlanks)
1703
1704 NegativeScores<-rep(NA,times=nrow(NegativeSubstrateList))
1705 NegativeWeirdScores<-rep(NA,times=nrow(NegativeSubstrateList))
1706 for (v in 1:nrow(NegativeSubstrateList)) {
1707 motif<-NegativeSubstrateList[v,2]
1708 motif<-unlist(strsplit(motif,""))
1709 #if (length(motif)<9){print(v)}}
1710 # motif[1] <- sapply(motif[1], function (x) aa_props[x])
1711 # motif[2] <- sapply(motif[2], function (x) aa_props[x])
1712 # motif[3] <- sapply(motif[3], function (x) aa_props[x])
1713 # motif[4] <- sapply(motif[4], function (x) aa_props[x])
1714 # motif[5] <- sapply(motif[5], function (x) aa_props[x])
1715 # motif[6] <- sapply(motif[6], function (x) aa_props[x])
1716 # motif[7] <- sapply(motif[7], function (x) aa_props[x])
1717 # motif[8] <- sapply(motif[8], function (x) aa_props[x])
1718 # motif[9] <- sapply(motif[9], function (x) aa_props[x])
1719 motif<- gsub(" ","O",motif)
1720 motif <- sapply(motif, function (x) aa_props[x])
1721 Scoringpeptide<-motif
1722 Scoringpeptide<-Scoringpeptide+1
1723 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]*
1724 ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]*
1725 #ThisKinTable[as.numeric(Scoringpeptide[8]),10]*
1726 ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]*
1727 ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17]
1728 NegativeScores[v]<-ThisKinTableScore
1729 ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2])))
1730 NegativeWeirdScores[v]<-ThisKinTableScore*100
1731 }
1732
1733 negativesubstrates<-NegativeSubstrateList[,2]
1734 NegativeWithScores<-cbind(negativesubstrates,as.character(NegativeScores),as.character(NegativeWeirdScores))
1735
1736
1737 #NEED TO HAVE THE NEGATIVE SUBSTRATES BE OUTPUTTED
1738
1739 PositiveScores<-rep(NA,times=nrow(ImportedSubstrateList))
1740 PositiveWeirdScores<-rep(NA,times=nrow(ImportedSubstrateList))
1741
1742 for (v in 1:nrow(ImportedSubstrateList)) {
1743 motif<-ImportedSubstrateList[v,4:18]
1744 motif<-unlist(motif)
1745 motif<- gsub("^$","O",motif)
1746 motif <- sapply(motif, function (x) aa_props[x])
1747 Scoringpeptide<-motif
1748 Scoringpeptide<-Scoringpeptide+1
1749 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]*
1750 ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]*
1751 #ThisKinTable[as.numeric(Scoringpeptide[8]),10]*
1752 ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]*
1753 ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17]
1754
1755 PositiveScores[v]<-ThisKinTableScore
1756 ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2])))
1757 PositiveWeirdScores[v]<-ThisKinTableScore*100
1758 }
1759
1760 positivesubstrates<-ImportedSubstrateList[,4:18]
1761 positivewithscores<-cbind.data.frame(positivesubstrates,PositiveScores,PositiveWeirdScores)
1762
1763
1764 #write down the transient transfection SOP and what we will be doing with them
1765 #write down the vector names I will be using
1766 #write down something about transforming bacteria and with what
1767
1768 #90% whatevernness
1769 # TPninetyone<-length(PositiveWeirdScores[PositiveWeirdScores>=0.91])
1770 # Senseninetyone<-TPninetyone/nrow(positivesubstrates)
1771 #
1772 # TNninetyone<-length(NegativeWeirdScores[NegativeWeirdScores<91])
1773 # Specninetyone<-TNninetyone/100
1774
1775 #create the MCC table
1776
1777 threshold<-c(1:100,(1:9)/10,(1:9)/100,0,-.1)
1778 threshold<-threshold[order(threshold,decreasing = TRUE)]
1779 threshold
1780
1781 Truepositives<-c(1:120)
1782 Falsenegatives<-c(1:120)
1783 Sensitivity<-c(1:120)
1784 TrueNegatives<-c(1:120)
1785 FalsePositives<-c(1:120)
1786 One_Minus_Specificity<-c(1:120)
1787 Accuracy<-c(1:120)
1788 MCC<-c(1:120)
1789 EER<-c(1:120)
1790 FalsePositiveRate<-c(1:120)
1791 Precision<-c(1:120)
1792 F_One_Half<-c(1:120)
1793 F_One<-c(1:120)
1794 F_Two<-c(1:120)
1795
1796 #MAKE DAMN SURE THAT THE ACCESSION NUMBERS FOLLOW THE MOTIFS
1797
1798 for (z in 1:120) {
1799 thres<-threshold[z]
1800 Truepositives[z]<-length(PositiveWeirdScores[PositiveWeirdScores>=(thres)])
1801 Falsenegatives[z]<-nrow(positivesubstrates)-Truepositives[z]
1802 Sensitivity[z]<-Truepositives[z]/(Falsenegatives[z]+Truepositives[z])
1803 TrueNegatives[z]<-length(NegativeWeirdScores[NegativeWeirdScores<(thres)])
1804 # at thresh 100 this should be 0, because it is total minus true negatives
1805 FalsePositives[z]<-nrow(NegativeSubstrateList)-TrueNegatives[z]
1806 One_Minus_Specificity[z]<-1-(TrueNegatives[z]/(FalsePositives[z]+TrueNegatives[z]))
1807 Accuracy[z]<-100*(Truepositives[z]+TrueNegatives[z])/(Falsenegatives[z]+FalsePositives[z]+TrueNegatives[z]+Truepositives[z])
1808 MCC[z]<-((Truepositives[z]*TrueNegatives[z])-(Falsenegatives[z]*FalsePositives[z]))/sqrt(round(round(Truepositives[z]+Falsenegatives[z])*round(TrueNegatives[z]+FalsePositives[z])*round(Truepositives[z]+FalsePositives[z])*round(TrueNegatives[z]+Falsenegatives[z])))
1809 EER[z]<-(FalsePositives[z]+Falsenegatives[z])/(Truepositives[z]+TrueNegatives[z]+FalsePositives[z]+Falsenegatives[z])
1810 Precision[z]<-Truepositives[z]/(Truepositives[z]+FalsePositives[z])
1811 F_One_Half[z]<-(1.5*Precision[z]*Sensitivity[z])/(.25*Precision[z]+Sensitivity[z])
1812 F_One<-(1*Precision[z]*Sensitivity[z])/(Precision[z]+Sensitivity[z])
1813 F_Two<-(5*Precision[z]*Sensitivity[z])/(4*Precision[z]+Sensitivity[z])
1814 FalsePositiveRate[z]<-FalsePositives[z]/(TrueNegatives[z]+FalsePositives[z])
1815 }
1816 Characterization<-cbind.data.frame(threshold,Truepositives,Falsenegatives,Sensitivity,TrueNegatives,FalsePositives,One_Minus_Specificity,MCC,EER,Precision,FalsePositiveRate,F_One_Half,F_One,F_Two)
1817
1818 positiveheader<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,"RPMS","PMS")
1819 positivewithscores<-rbind.data.frame(positiveheader,positivewithscores)
1820
1821 negativeheader<-c("Substrate","RPMS","PMS")
1822 colnames(NegativeWithScores)<-negativeheader
1823
1824 # write.xlsx(NegativeWithScores,file = FILENAME, sheetName = "Negative Sequences Scored",col.names = TRUE,row.names = FALSE,append = TRUE)
1825 # write.xlsx(Characterization,file = FILENAME,sheetName = "Characterization Table",col.names = TRUE,row.names = FALSE,append = TRUE)
1826 # write.xlsx(RanksPeptides,file = FILENAME,sheetName = "Ranked Generated Peptides",col.names = FALSE,row.names = FALSE,append = TRUE)
1827 # write.xlsx(positivewithscores,file = FILENAME, sheetName = "Positive Sequences Scored",col.names = FALSE,row.names = FALSE,append = TRUE)
1828 write.table(x=c("Characterzation Table"),file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",")
1829 header<-colnames(Characterization)
1830 Characterization<-rbind.data.frame(header,Characterization)
1831 write.table(Characterization,file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",")
1832
1833 # header<-colnames(RanksPeptides)
1834 # RanksPeptides<-rbind.data.frame(header,RanksPeptides)
1835 write.table(x="Off Target Kinase activity (your kinase of interest should have zeros here because it is ON-target)",file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",")
1836 write.table(RanksPeptides,file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",")
1837
1838
1839 PositivesWithScores<-as.data.frame(ImportedSubstrateList)
1840 PositiveOutput<-cbind(PositivesWithScores,PositiveWeirdScores)
1841 write.table(PositiveOutput, file = FILENAME4, sep = ",", na="",row.names=FALSE)
1842
1843 NegativeOutput<-as.data.frame(NegativeSubstrateList)
1844 NegativeOutput<-cbind(NegativeOutput,NegativeWeirdScores)
1845 write.table(NegativeOutput,file = FILENAME5,sep = ",",na="",row.names = FALSE)