comparison KT part 1/Kinatest-R part 1.R @ 0:27d804c7b43f draft default tip

Uploaded
author jfb
date Sun, 16 Sep 2018 18:03:15 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:27d804c7b43f
1 oldw <- getOption("warn")
2 options(warn = -1)
3
4 ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE)
5 NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE)
6 SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE, header = FALSE)
7
8 ScreenerFilename<-"screener"
9
10
11
12 FILENAME<-"output1"
13 FILENAME2<-"output2"
14 FILENAME3<-"output3"
15
16
17
18 SubstrateBackgroundFrequency<-t(SubstrateBackgroundFrequency)
19 # number<-nrow(SubstrateBackgroundFrequency)-1
20 SubstrateBackgroundFrequency<-SubstrateBackgroundFrequency[2:nrow(SubstrateBackgroundFrequency),]
21 Sub<-na.omit(SubstrateBackgroundFrequency)
22 SubstrateBackgroundFrequency<-Sub
23
24 args = commandArgs(trailingOnly=TRUE)
25 TodaysKinase<-as.character(args[1])
26
27
28
29
30
31
32 OutputMatrix<-"KinaseMatrix.csv"
33 CharacterizationTable<-"CharacterizationTableForThisKinase.csv"
34 SDtable<-"SDtableforthisKinase"
35 SiteSelectivityTable<-"SiteSelectivityForThisKinase"
36
37
38
39 substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15)
40 #SeqsToBeScored<-"asdasd"
41
42 for (i in 2:nrow(ImportedSubstrateList))
43 {
44 substratemotif<-ImportedSubstrateList[i,4:18]
45 substratemotif[8]<-"Y"
46 #substratemotif<-paste(substratemotif,sep = "",collapse = "")
47 j=i-1
48 substratemotif<-unlist(substratemotif)
49 substrates[j,1:15]<-substratemotif
50 }
51
52 # SpacesToOs<-c(""="O",)
53 # substrates<-SpacesToOs[substrates]
54
55 SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2]
56
57 if(2==2){
58 Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
59 Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
60 Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
61 Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
62 Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
63 Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
64 Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
65 Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
66 Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
67 Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
68 Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
69 Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
70 Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
71 Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
72 Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
73 Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
74 Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
75 Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
76 Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
77 Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
78
79 AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean)
80
81 Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
82 Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
83 Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
84 Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
85 Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
86 Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
87 Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
88 Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
89 Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
90 Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
91 Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
92 Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
93 Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
94 Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
95 Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
96 Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
97 Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
98 Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
99 Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
100 Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
101 }
102 AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd)
103 #this is subbackfreq SDs
104
105 SBF_statisticalvalues<-cbind(AllMeans,AllSDs)
106
107 #create the percent table
108 if (1==1){
109 Column1<-substrates[,1]
110 Column2<-substrates[,2]
111 Column3<-substrates[,3]
112 Column4<-substrates[,4]
113 Column5<-substrates[,5]
114 Column6<-substrates[,6]
115 Column7<-substrates[,7]
116 Column8<-substrates[,8]
117 Column9<-substrates[,9]
118 Column10<-substrates[,10]
119 Column11<-substrates[,11]
120 Column12<-substrates[,12]
121 Column13<-substrates[,13]
122 Column14<-substrates[,14]
123 Column15<-substrates[,15]
124
125 spaces1<-sum((Column1%in% ""))
126 spaces2<-sum(Column2%in% "")
127 spaces3<-sum(Column3%in% "")
128 spaces4<-sum(Column4%in% "")
129 spaces5<-sum(Column5%in% "")
130 spaces6<-sum(Column6%in% "")
131 spaces7<-sum(Column7%in% "")
132 spaces8<-sum(Column8%in% "")
133 spaces9<-sum(Column9%in% "")
134 spaces10<-sum(Column10%in% "")
135 spaces11<-sum(Column11%in% "")
136 spaces12<-sum(Column12%in% "")
137 spaces13<-sum(Column13%in% "")
138 spaces14<-sum(Column14%in% "")
139 spaces15<-sum(Column15%in% "")
140
141 A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1)
142 A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2)
143 A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3)
144 A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4)
145 A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5)
146 A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6)
147 A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7)
148 A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8)
149 A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9)
150 A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10)
151 A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11)
152 A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12)
153 A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13)
154 A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14)
155 A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15)
156 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
157
158 C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1)
159 C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2)
160 C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3)
161 C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4)
162 C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5)
163 C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6)
164 C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7)
165 C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8)
166 C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9)
167 C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10)
168 C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11)
169 C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12)
170 C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13)
171 C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14)
172 C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15)
173 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
174
175 D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1)
176 D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2)
177 D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3)
178 D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4)
179 D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5)
180 D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6)
181 D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7)
182 D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8)
183 D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9)
184 D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10)
185 D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11)
186 D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12)
187 D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13)
188 D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14)
189 D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15)
190 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
191
192 E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1)
193 E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2)
194 E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3)
195 E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4)
196 E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5)
197 E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6)
198 E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7)
199 E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8)
200 E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9)
201 E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10)
202 E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11)
203 E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12)
204 E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13)
205 E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14)
206 E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15)
207 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
208
209
210 F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1)
211 F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2)
212 F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3)
213 F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4)
214 F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5)
215 F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6)
216 F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7)
217 F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8)
218 F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9)
219 F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10)
220 F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11)
221 F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12)
222 F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13)
223 F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14)
224 F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15)
225 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
226
227
228 G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1)
229 G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2)
230 G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3)
231 G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4)
232 G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5)
233 G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6)
234 G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7)
235 G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8)
236 G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9)
237 G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10)
238 G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11)
239 G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12)
240 G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13)
241 G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14)
242 G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15)
243 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
244
245
246 H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1)
247 H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2)
248 H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3)
249 H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4)
250 H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5)
251 H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6)
252 H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7)
253 H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8)
254 H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9)
255 H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10)
256 H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11)
257 H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12)
258 H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13)
259 H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14)
260 H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15)
261 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
262
263
264 I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1)
265 I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2)
266 I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3)
267 I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4)
268 I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5)
269 I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6)
270 I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7)
271 I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8)
272 I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9)
273 I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10)
274 I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11)
275 I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12)
276 I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13)
277 I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14)
278 I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15)
279 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
280
281
282 K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1)
283 K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2)
284 K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3)
285 K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4)
286 K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5)
287 K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6)
288 K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7)
289 K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8)
290 K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9)
291 K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10)
292 K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11)
293 K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12)
294 K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13)
295 K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14)
296 K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15)
297 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
298
299
300 L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1)
301 L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2)
302 L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3)
303 L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4)
304 L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5)
305 L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6)
306 L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7)
307 L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8)
308 L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9)
309 L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10)
310 L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11)
311 L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12)
312 L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13)
313 L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14)
314 L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15)
315 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
316
317
318 M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1)
319 M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2)
320 M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3)
321 M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4)
322 M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5)
323 M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6)
324 M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7)
325 M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8)
326 M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9)
327 M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10)
328 M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11)
329 M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12)
330 M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13)
331 M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14)
332 M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15)
333 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
334
335
336 N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1)
337 N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2)
338 N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3)
339 N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4)
340 N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5)
341 N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6)
342 N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7)
343 N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8)
344 N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9)
345 N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10)
346 N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11)
347 N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12)
348 N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13)
349 N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14)
350 N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15)
351 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
352
353
354 P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1)
355 P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2)
356 P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3)
357 P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4)
358 P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5)
359 P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6)
360 P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7)
361 P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8)
362 P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9)
363 P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10)
364 P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11)
365 P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12)
366 P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13)
367 P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14)
368 P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15)
369 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
370
371
372 Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1)
373 Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2)
374 Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3)
375 Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4)
376 Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5)
377 Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6)
378 Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7)
379 Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8)
380 Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9)
381 Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10)
382 Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11)
383 Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12)
384 Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13)
385 Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14)
386 Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15)
387 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
388
389
390 R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1)
391 R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2)
392 R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3)
393 R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4)
394 R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5)
395 R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6)
396 R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7)
397 R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8)
398 R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9)
399 R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10)
400 R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11)
401 R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12)
402 R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13)
403 R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14)
404 R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15)
405 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
406
407
408 S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1)
409 S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2)
410 S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3)
411 S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4)
412 S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5)
413 S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6)
414 S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7)
415 S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8)
416 S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9)
417 S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10)
418 S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11)
419 S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12)
420 S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13)
421 S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14)
422 S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15)
423 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
424
425
426 T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1)
427 T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2)
428 T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3)
429 T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4)
430 T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5)
431 T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6)
432 T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7)
433 T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8)
434 T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9)
435 T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10)
436 T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11)
437 T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12)
438 T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13)
439 T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14)
440 T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15)
441 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
442
443
444 V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1)
445 V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2)
446 V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3)
447 V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4)
448 V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5)
449 V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6)
450 V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7)
451 V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8)
452 V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9)
453 V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10)
454 V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11)
455 V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12)
456 V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13)
457 V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14)
458 V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15)
459 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
460
461
462 W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1)
463 W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2)
464 W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3)
465 W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4)
466 W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5)
467 W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6)
468 W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7)
469 W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8)
470 W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9)
471 W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10)
472 W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11)
473 W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12)
474 W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13)
475 W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14)
476 W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15)
477 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
478
479
480 Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1)
481 Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2)
482 Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3)
483 Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4)
484 Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5)
485 Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6)
486 Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7)
487 Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8)
488 Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9)
489 Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10)
490 Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11)
491 Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12)
492 Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13)
493 Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14)
494 Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15)
495 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
496 }
497 #this is substrate percents
498
499 #A C D E F G H I K L N P Q R S T V W Y
500
501 PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
502 PercentTable<-PercentTable*100
503
504 #create the SD table
505 SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable))
506 #for every row, a percertage minus the same mean over the same SD
507 if(1==1){
508 SDtable[1,]<-(PercentTable[1,]-Amean)/Asd
509 SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd
510 SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd
511 SDtable[4,]<-(PercentTable[4,]-Emean)/Esd
512 SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd
513 SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd
514 SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd
515 SDtable[8,]<-(PercentTable[8,]-Imean)/Isd
516 SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd
517 SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd
518 SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd
519 SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd
520 SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd
521 SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd
522 SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd
523 SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd
524 SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd
525 SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd
526 SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd
527 SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd
528 }
529
530
531 SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")
532
533 SumOfSigmaAAs<-c(1:15)
534
535 for (i in 1:15){
536 SumOfSigmasValue<-0
537 for (j in 1:20){
538 value<-0
539 if (SDtable[j,i]>2){
540 q=j+1
541 value<-sum(substrates[,i]==SetOfAAs[q])
542 }
543 SumOfSigmasValue<-SumOfSigmasValue+value
544 }
545 SumOfSigmaAAs[i]<-SumOfSigmasValue
546 }
547
548 AAs1<-length(substrates[,1])-sum(substrates[,1]=="")
549 AAs2<-length(substrates[,2])-sum(substrates[,2]=="")
550 AAs3<-length(substrates[,3])-sum(substrates[,3]=="")
551 AAs4<-length(substrates[,4])-sum(substrates[,4]=="")
552 AAs5<-length(substrates[,5])-sum(substrates[,5]=="")
553 AAs6<-length(substrates[,6])-sum(substrates[,6]=="")
554 AAs7<-length(substrates[,7])-sum(substrates[,7]=="")
555 AAs8<-length(substrates[,8])-sum(substrates[,8]=="")
556 AAs9<-length(substrates[,9])-sum(substrates[,9]=="")
557 AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9)
558 # AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]),
559 # length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]),
560 # length(substrates[,9]))
561
562 SumOfExpectedSigmaAAs<-c(1:15)
563 for (i in 1:15){
564 ExpectedValue<-0
565 for (j in 1:20){
566 value<-0
567 if (SDtable[j,i]>2){
568 value<-AllMeans[j]
569 }
570 ExpectedValue<-ExpectedValue+value
571 }
572 SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100
573 }
574
575 #SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs
576
577 SelectivityRow<-rep(0,times=15)
578
579 for (q in 1:15) {
580 SelectivityRow[q]<-(SumOfSigmaAAs[q]/SumOfExpectedSigmaAAs[q])*(SumOfSigmaAAs[q]/AAsAtPositions[q])
581 }
582
583 SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow)
584
585 SetOfAAs<-matrix(data = SetOfAAs,ncol = 1)
586 SDtableu<-SDtable
587 HeaderSD<-c(-7:7)
588 SDtable<-rbind(HeaderSD,SDtableu)
589 row.names(SDtable)<-NULL
590 SDtable<-data.frame(SetOfAAs,SDtable)
591
592 PercentTable<-rbind(HeaderSD,PercentTable)
593 row.names(PercentTable)<-NULL
594 PercentTable<-data.frame(SetOfAAs,PercentTable)
595 numberofY<-as.numeric(SubstrateBackgroundFrequency[,34])
596 numberofY<-numberofY[!is.na(numberofY)]
597
598 numberofPY<-as.numeric(SubstrateBackgroundFrequency[,35])
599 numberofPY<-numberofPY[!is.na(numberofPY)]
600
601 NormalizationScore<-sum(numberofPY)/sum(numberofY)
602
603 # positions<-matrix(data = NA, nrow=20,ncol = 15)
604 #
605 # #column1
606 #
607 # for (q in 1:15) {
608 # sA<-sum(substrates[,i]=="A")
609 # positions[1,i]<-sA
610 # sC<-sum(substrates[,i]=="C")
611 # positions[2,i]<-sC
612 # sD<-sum(substrates[,i]=="D")
613 # positions[3,i]<-sD
614 # sE<-sum(substrates[,i]=="E")
615 # positions[4,i]<-sE
616 # sF<-sum(substrates[,i]=="F")
617 # sG<-sum(substrates[,i]=="G")
618 # sH<-sum(substrates[,i]=="H")
619 # sI<-sum(substrates[,i]=="I")
620 # sK<-sum(substrates[,i]=="K")
621 # sL<-sum(substrates[,i]=="L")
622 # sM<-sum(substrates[,i]=="M")
623 # sN<-sum(substrates[,i]=="N")
624 # sP<-sum(substrates[,i]=="P")
625 # sQ<-sum(substrates[,i]=="Q")
626 # sR<-sum(substrates[,i]=="R")
627 # sS<-sum(substrates[,i]=="S")
628 # sT<-sum(substrates[,i]=="T")
629 # sV<-sum(substrates[,i]=="V")
630 # sW<-sum(substrates[,i]=="W")
631 # sY<-sum(substrates[,i]=="Y")
632 # positions[5,i]<-sF
633 # positions[6,i]<-sG
634 # positions[7,i]<-sH
635 # positions[8,i]<-sI
636 # positions[9,i]<-sK
637 # positions[10,i]<-sL
638 # positions[11,i]<-sM
639 # positions[12,i]<-sN
640 # positions[13,i]<-sP
641 # positions[14,i]<-sQ
642 # positions[15,i]<-sR
643 # positions[16,i]<-sS
644 # positions[17,i]<-sT
645 # positions[18,i]<-sV
646 # positions[19,i]<-sW
647 # positions[20,i]<-sY
648 # }
649
650 #here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot?
651 if (6==6){
652 Column1<-substrates[,1]
653 Column2<-substrates[,2]
654 Column3<-substrates[,3]
655 Column4<-substrates[,4]
656 Column5<-substrates[,5]
657 Column6<-substrates[,6]
658 Column7<-substrates[,7]
659 Column8<-substrates[,8]
660 Column9<-substrates[,9]
661 Column10<-substrates[,10]
662 Column11<-substrates[,11]
663 Column12<-substrates[,12]
664 Column13<-substrates[,13]
665 Column14<-substrates[,14]
666 Column15<-substrates[,15]
667
668 spaces1<-sum((Column1%in% ""))
669 spaces2<-sum(Column2%in% "")
670 spaces3<-sum(Column3%in% "")
671 spaces4<-sum(Column4%in% "")
672 spaces5<-sum(Column5%in% "")
673 spaces6<-sum(Column6%in% "")
674 spaces7<-sum(Column7%in% "")
675 spaces8<-sum(Column8%in% "")
676 spaces9<-sum(Column9%in% "")
677 spaces10<-sum(Column10%in% "")
678 spaces11<-sum(Column11%in% "")
679 spaces12<-sum(Column12%in% "")
680 spaces13<-sum(Column13%in% "")
681 spaces14<-sum(Column14%in% "")
682 spaces15<-sum(Column15%in% "")
683
684 A1<-sum(Column1 %in% "A")
685 A2<-sum(Column2 %in% "A")
686 A3<-sum(Column3 %in% "A")
687 A4<-sum(Column4 %in% "A")
688 A5<-sum(Column5 %in% "A")
689 A6<-sum(Column6 %in% "A")
690 A7<-sum(Column7 %in% "A")
691 A8<-sum(Column8 %in% "A")
692 A9<-sum(Column9 %in% "A")
693 A10<-sum(Column10 %in% "A")
694 A11<-sum(Column11 %in% "A")
695 A12<-sum(Column12 %in% "A")
696 A13<-sum(Column13 %in% "A")
697 A14<-sum(Column14 %in% "A")
698 A15<-sum(Column15 %in% "A")
699 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
700
701 C1<-sum(Column1 %in% "C")
702 C2<-sum(Column2 %in% "C")
703 C3<-sum(Column3 %in% "C")
704 C4<-sum(Column4 %in% "C")
705 C5<-sum(Column5 %in% "C")
706 C6<-sum(Column6 %in% "C")
707 C7<-sum(Column7 %in% "C")
708 C8<-sum(Column8 %in% "C")
709 C9<-sum(Column9 %in% "C")
710 C10<-sum(Column10 %in% "C")
711 C11<-sum(Column11 %in% "C")
712 C12<-sum(Column12 %in% "C")
713 C13<-sum(Column13 %in% "C")
714 C14<-sum(Column14 %in% "C")
715 C15<-sum(Column15 %in% "C")
716 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
717
718 D1<-sum(Column1 %in% "D")
719 D2<-sum(Column2 %in% "D")
720 D3<-sum(Column3 %in% "D")
721 D4<-sum(Column4 %in% "D")
722 D5<-sum(Column5 %in% "D")
723 D6<-sum(Column6 %in% "D")
724 D7<-sum(Column7 %in% "D")
725 D8<-sum(Column8 %in% "D")
726 D9<-sum(Column9 %in% "D")
727 D10<-sum(Column10 %in% "D")
728 D11<-sum(Column11 %in% "D")
729 D12<-sum(Column12 %in% "D")
730 D13<-sum(Column13 %in% "D")
731 D14<-sum(Column14 %in% "D")
732 D15<-sum(Column15 %in% "D")
733 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
734
735 E1<-sum(Column1 %in% "E")
736 E2<-sum(Column2 %in% "E")
737 E3<-sum(Column3 %in% "E")
738 E4<-sum(Column4 %in% "E")
739 E5<-sum(Column5 %in% "E")
740 E6<-sum(Column6 %in% "E")
741 E7<-sum(Column7 %in% "E")
742 E8<-sum(Column8 %in% "E")
743 E9<-sum(Column9 %in% "E")
744 E10<-sum(Column10 %in% "E")
745 E11<-sum(Column11 %in% "E")
746 E12<-sum(Column12 %in% "E")
747 E13<-sum(Column13 %in% "E")
748 E14<-sum(Column14 %in% "E")
749 E15<-sum(Column15 %in% "E")
750 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
751
752 F1<-sum(Column1 %in% "F")
753 F2<-sum(Column2 %in% "F")
754 F3<-sum(Column3 %in% "F")
755 F4<-sum(Column4 %in% "F")
756 F5<-sum(Column5 %in% "F")
757 F6<-sum(Column6 %in% "F")
758 F7<-sum(Column7 %in% "F")
759 F8<-sum(Column8 %in% "F")
760 F9<-sum(Column9 %in% "F")
761 F10<-sum(Column10 %in% "F")
762 F11<-sum(Column11 %in% "F")
763 F12<-sum(Column12 %in% "F")
764 F13<-sum(Column13 %in% "F")
765 F14<-sum(Column14 %in% "F")
766 F15<-sum(Column15 %in% "F")
767 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
768
769 G1<-sum(Column1 %in% "G")
770 G2<-sum(Column2 %in% "G")
771 G3<-sum(Column3 %in% "G")
772 G4<-sum(Column4 %in% "G")
773 G5<-sum(Column5 %in% "G")
774 G6<-sum(Column6 %in% "G")
775 G7<-sum(Column7 %in% "G")
776 G8<-sum(Column8 %in% "G")
777 G9<-sum(Column9 %in% "G")
778 G10<-sum(Column10 %in% "G")
779 G11<-sum(Column11 %in% "G")
780 G12<-sum(Column12 %in% "G")
781 G13<-sum(Column13 %in% "G")
782 G14<-sum(Column14 %in% "G")
783 G15<-sum(Column15 %in% "G")
784 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
785
786 H1<-sum(Column1 %in% "H")
787 H2<-sum(Column2 %in% "H")
788 H3<-sum(Column3 %in% "H")
789 H4<-sum(Column4 %in% "H")
790 H5<-sum(Column5 %in% "H")
791 H6<-sum(Column6 %in% "H")
792 H7<-sum(Column7 %in% "H")
793 H8<-sum(Column8 %in% "H")
794 H9<-sum(Column9 %in% "H")
795 H10<-sum(Column10 %in% "H")
796 H11<-sum(Column11 %in% "H")
797 H12<-sum(Column12 %in% "H")
798 H13<-sum(Column13 %in% "H")
799 H14<-sum(Column14 %in% "H")
800 H15<-sum(Column15 %in% "H")
801 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
802
803 I1<-sum(Column1 %in% "I")
804 I2<-sum(Column2 %in% "I")
805 I3<-sum(Column3 %in% "I")
806 I4<-sum(Column4 %in% "I")
807 I5<-sum(Column5 %in% "I")
808 I6<-sum(Column6 %in% "I")
809 I7<-sum(Column7 %in% "I")
810 I8<-sum(Column8 %in% "I")
811 I9<-sum(Column9 %in% "I")
812 I10<-sum(Column10 %in% "I")
813 I11<-sum(Column11 %in% "I")
814 I12<-sum(Column12 %in% "I")
815 I13<-sum(Column13 %in% "I")
816 I14<-sum(Column14 %in% "I")
817 I15<-sum(Column15 %in% "I")
818 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
819
820 K1<-sum(Column1 %in% "K")
821 K2<-sum(Column2 %in% "K")
822 K3<-sum(Column3 %in% "K")
823 K4<-sum(Column4 %in% "K")
824 K5<-sum(Column5 %in% "K")
825 K6<-sum(Column6 %in% "K")
826 K7<-sum(Column7 %in% "K")
827 K8<-sum(Column8 %in% "K")
828 K9<-sum(Column9 %in% "K")
829 K10<-sum(Column10 %in% "K")
830 K11<-sum(Column11 %in% "K")
831 K12<-sum(Column12 %in% "K")
832 K13<-sum(Column13 %in% "K")
833 K14<-sum(Column14 %in% "K")
834 K15<-sum(Column15 %in% "K")
835 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
836
837 L1<-sum(Column1 %in% "L")
838 L2<-sum(Column2 %in% "L")
839 L3<-sum(Column3 %in% "L")
840 L4<-sum(Column4 %in% "L")
841 L5<-sum(Column5 %in% "L")
842 L6<-sum(Column6 %in% "L")
843 L7<-sum(Column7 %in% "L")
844 L8<-sum(Column8 %in% "L")
845 L9<-sum(Column9 %in% "L")
846 L10<-sum(Column10 %in% "L")
847 L11<-sum(Column11 %in% "L")
848 L12<-sum(Column12 %in% "L")
849 L13<-sum(Column13 %in% "L")
850 L14<-sum(Column14 %in% "L")
851 L15<-sum(Column15 %in% "L")
852 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
853
854 M1<-sum(Column1 %in% "M")
855 M2<-sum(Column2 %in% "M")
856 M3<-sum(Column3 %in% "M")
857 M4<-sum(Column4 %in% "M")
858 M5<-sum(Column5 %in% "M")
859 M6<-sum(Column6 %in% "M")
860 M7<-sum(Column7 %in% "M")
861 M8<-sum(Column8 %in% "M")
862 M9<-sum(Column9 %in% "M")
863 M10<-sum(Column10 %in% "M")
864 M11<-sum(Column11 %in% "M")
865 M12<-sum(Column12 %in% "M")
866 M13<-sum(Column13 %in% "M")
867 M14<-sum(Column14 %in% "M")
868 M15<-sum(Column15 %in% "M")
869 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
870
871 N1<-sum(Column1 %in% "N")
872 N2<-sum(Column2 %in% "N")
873 N3<-sum(Column3 %in% "N")
874 N4<-sum(Column4 %in% "N")
875 N5<-sum(Column5 %in% "N")
876 N6<-sum(Column6 %in% "N")
877 N7<-sum(Column7 %in% "N")
878 N8<-sum(Column8 %in% "N")
879 N9<-sum(Column9 %in% "N")
880 N10<-sum(Column10 %in% "N")
881 N11<-sum(Column11 %in% "N")
882 N12<-sum(Column12 %in% "N")
883 N13<-sum(Column13 %in% "N")
884 N14<-sum(Column14 %in% "N")
885 N15<-sum(Column15 %in% "N")
886 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
887
888 P1<-sum(Column1 %in% "P")
889 P2<-sum(Column2 %in% "P")
890 P3<-sum(Column3 %in% "P")
891 P4<-sum(Column4 %in% "P")
892 P5<-sum(Column5 %in% "P")
893 P6<-sum(Column6 %in% "P")
894 P7<-sum(Column7 %in% "P")
895 P8<-sum(Column8 %in% "P")
896 P9<-sum(Column9 %in% "P")
897 P10<-sum(Column10 %in% "P")
898 P11<-sum(Column11 %in% "P")
899 P12<-sum(Column12 %in% "P")
900 P13<-sum(Column13 %in% "P")
901 P14<-sum(Column14 %in% "P")
902 P15<-sum(Column15 %in% "P")
903 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
904
905 Q1<-sum(Column1 %in% "Q")
906 Q2<-sum(Column2 %in% "Q")
907 Q3<-sum(Column3 %in% "Q")
908 Q4<-sum(Column4 %in% "Q")
909 Q5<-sum(Column5 %in% "Q")
910 Q6<-sum(Column6 %in% "Q")
911 Q7<-sum(Column7 %in% "Q")
912 Q8<-sum(Column8 %in% "Q")
913 Q9<-sum(Column9 %in% "Q")
914 Q10<-sum(Column10 %in% "Q")
915 Q11<-sum(Column11 %in% "Q")
916 Q12<-sum(Column12 %in% "Q")
917 Q13<-sum(Column13 %in% "Q")
918 Q14<-sum(Column14 %in% "Q")
919 Q15<-sum(Column15 %in% "Q")
920 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
921
922 R1<-sum(Column1 %in% "R")
923 R2<-sum(Column2 %in% "R")
924 R3<-sum(Column3 %in% "R")
925 R4<-sum(Column4 %in% "R")
926 R5<-sum(Column5 %in% "R")
927 R6<-sum(Column6 %in% "R")
928 R7<-sum(Column7 %in% "R")
929 R8<-sum(Column8 %in% "R")
930 R9<-sum(Column9 %in% "R")
931 R10<-sum(Column10 %in% "R")
932 R11<-sum(Column11 %in% "R")
933 R12<-sum(Column12 %in% "R")
934 R13<-sum(Column13 %in% "R")
935 R14<-sum(Column14 %in% "R")
936 R15<-sum(Column15 %in% "R")
937 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
938
939 S1<-sum(Column1 %in% "S")
940 S2<-sum(Column2 %in% "S")
941 S3<-sum(Column3 %in% "S")
942 S4<-sum(Column4 %in% "S")
943 S5<-sum(Column5 %in% "S")
944 S6<-sum(Column6 %in% "S")
945 S7<-sum(Column7 %in% "S")
946 S8<-sum(Column8 %in% "S")
947 S9<-sum(Column9 %in% "S")
948 S10<-sum(Column10 %in% "S")
949 S11<-sum(Column11 %in% "S")
950 S12<-sum(Column12 %in% "S")
951 S13<-sum(Column13 %in% "S")
952 S14<-sum(Column14 %in% "S")
953 S15<-sum(Column15 %in% "S")
954 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
955
956 T1<-sum(Column1 %in% "T")
957 T2<-sum(Column2 %in% "T")
958 T3<-sum(Column3 %in% "T")
959 T4<-sum(Column4 %in% "T")
960 T5<-sum(Column5 %in% "T")
961 T6<-sum(Column6 %in% "T")
962 T7<-sum(Column7 %in% "T")
963 T8<-sum(Column8 %in% "T")
964 T9<-sum(Column9 %in% "T")
965 T10<-sum(Column10 %in% "T")
966 T11<-sum(Column11 %in% "T")
967 T12<-sum(Column12 %in% "T")
968 T13<-sum(Column13 %in% "T")
969 T14<-sum(Column14 %in% "T")
970 T15<-sum(Column15 %in% "T")
971 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
972
973 V1<-sum(Column1 %in% "V")
974 V2<-sum(Column2 %in% "V")
975 V3<-sum(Column3 %in% "V")
976 V4<-sum(Column4 %in% "V")
977 V5<-sum(Column5 %in% "V")
978 V6<-sum(Column6 %in% "V")
979 V7<-sum(Column7 %in% "V")
980 V8<-sum(Column8 %in% "V")
981 V9<-sum(Column9 %in% "V")
982 V10<-sum(Column10 %in% "V")
983 V11<-sum(Column11 %in% "V")
984 V12<-sum(Column12 %in% "V")
985 V13<-sum(Column13 %in% "V")
986 V14<-sum(Column14 %in% "V")
987 V15<-sum(Column15 %in% "V")
988 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
989
990 W1<-sum(Column1 %in% "W")
991 W2<-sum(Column2 %in% "W")
992 W3<-sum(Column3 %in% "W")
993 W4<-sum(Column4 %in% "W")
994 W5<-sum(Column5 %in% "W")
995 W6<-sum(Column6 %in% "W")
996 W7<-sum(Column7 %in% "W")
997 W8<-sum(Column8 %in% "W")
998 W9<-sum(Column9 %in% "W")
999 W10<-sum(Column10 %in% "W")
1000 W11<-sum(Column11 %in% "W")
1001 W12<-sum(Column12 %in% "W")
1002 W13<-sum(Column13 %in% "W")
1003 W14<-sum(Column14 %in% "W")
1004 W15<-sum(Column15 %in% "W")
1005 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
1006
1007 Y1<-sum(Column1 %in% "Y")
1008 Y2<-sum(Column2 %in% "Y")
1009 Y3<-sum(Column3 %in% "Y")
1010 Y4<-sum(Column4 %in% "Y")
1011 Y5<-sum(Column5 %in% "Y")
1012 Y6<-sum(Column6 %in% "Y")
1013 Y7<-sum(Column7 %in% "Y")
1014 Y8<-sum(Column8 %in% "Y")
1015 Y9<-sum(Column9 %in% "Y")
1016 Y10<-sum(Column10 %in% "Y")
1017 Y11<-sum(Column11 %in% "Y")
1018 Y12<-sum(Column12 %in% "Y")
1019 Y13<-sum(Column13 %in% "Y")
1020 Y14<-sum(Column14 %in% "Y")
1021 Y15<-sum(Column15 %in% "Y")
1022 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
1023 PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
1024 }
1025 #endogenous prob matrix is AA position over subbackfreqmean
1026 dim(PositionTable)
1027 EPMtable<-PositionTable
1028 # EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean))
1029 # EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean))
1030 # EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean))
1031 # EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean))
1032 # EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean))
1033 # EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean))
1034 # EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean))
1035 # EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean))
1036 # EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean))
1037 # EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean))
1038 # EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean))
1039 # EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean))
1040 # EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean))
1041 # EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean))
1042 # EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean))
1043 # EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean))
1044 # EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean))
1045 # EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean))
1046 # EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean))
1047 # EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean))
1048
1049 columns<-c(length(Column1)-sum(Column1==""),
1050 length(Column2)-sum(Column2==""),
1051 length(Column3)-sum(Column3==""),
1052 length(Column4)-sum(Column4==""),
1053 length(Column5)-sum(Column5==""),
1054 length(Column6)-sum(Column6==""),
1055 length(Column7)-sum(Column7==""),
1056 length(Column8)-sum(Column8==""),
1057 length(Column9)-sum(Column9==""),
1058 length(Column10)-sum(Column10==""),
1059 length(Column11)-sum(Column11==""),
1060 length(Column12)-sum(Column12==""),
1061 length(Column13)-sum(Column13==""),
1062 length(Column14)-sum(Column14==""),
1063 length(Column15)-sum(Column15==""))
1064
1065 for (z in 1:15) {
1066 for (y in 1:20) {
1067 if (PositionTable[y,z]>0){
1068 EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y]))
1069 }
1070 if (PositionTable[y,z]==0){
1071 EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y]))
1072 }
1073 }
1074 }
1075 #here I created the endogenous probability matrix
1076 #now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs
1077
1078
1079
1080
1081
1082 # write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE)
1083 # write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE)
1084 # write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE)
1085 # write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE)
1086 # write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE)
1087
1088 NormalizationScore<-c("Normalization Score",NormalizationScore)
1089
1090 write.table(x=matrix(c("SD Table","please","just","ignore","these","words","up","here","for",
1091 "now","yo","yo","yo","yo","yo","yo"),
1092 nrow=1),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1093 write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1094 write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1095 write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE)
1096
1097 EPMtableu<-EPMtable
1098 HeaderSD<-c(-7:7)
1099 EPMtableu<-rbind(HeaderSD,EPMtableu)
1100 row.names(EPMtableu)<-NULL
1101 EPMtableu<-data.frame(SetOfAAs,EPMtableu)
1102
1103 write.table(matrix(c("Selectivity","just","ignore","these","words","up","here","for","now","yo","yo","yo","yo","yo","yo"),
1104 nrow=1),file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
1105 SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1)
1106 head<-matrix(data=rep(" ",times=16),nrow = 1)
1107 SelectivityHeader<-rbind(head,SelectivityHeader)
1108
1109 write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
1110 #colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7")
1111 write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE)
1112 write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
1113 write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
1114 write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
1115
1116
1117 options(warn = oldw)