|
0
|
1 ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE)
|
|
|
2 NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE)
|
|
|
3 SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE, header = FALSE)
|
|
|
4
|
|
|
5 ScreenerFilename<-"screener"
|
|
|
6
|
|
|
7
|
|
|
8
|
|
|
9 FILENAME<-"output1"
|
|
|
10 FILENAME2<-"output2"
|
|
|
11 FILENAME3<-"output3"
|
|
|
12
|
|
|
13
|
|
|
14
|
|
|
15 SubstrateBackgroundFrequency<-t(SubstrateBackgroundFrequency)
|
|
|
16 # number<-nrow(SubstrateBackgroundFrequency)-1
|
|
|
17 SubstrateBackgroundFrequency<-SubstrateBackgroundFrequency[2:nrow(SubstrateBackgroundFrequency),]
|
|
|
18 Sub<-na.omit(SubstrateBackgroundFrequency)
|
|
1
|
19 SubstrateBackgroundFrequency<-Sub
|
|
0
|
20
|
|
|
21
|
|
|
22
|
|
|
23
|
|
|
24
|
|
|
25
|
|
|
26
|
|
|
27
|
|
|
28
|
|
|
29
|
|
|
30 OutputMatrix<-"KinaseMatrix.csv"
|
|
|
31 CharacterizationTable<-"CharacterizationTableForThisKinase.csv"
|
|
|
32 SDtable<-"SDtableforthisKinase"
|
|
|
33 SiteSelectivityTable<-"SiteSelectivityForThisKinase"
|
|
|
34
|
|
|
35
|
|
|
36
|
|
|
37 substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15)
|
|
|
38 #SeqsToBeScored<-"asdasd"
|
|
|
39
|
|
|
40 for (i in 2:nrow(ImportedSubstrateList))
|
|
|
41 {
|
|
|
42 substratemotif<-ImportedSubstrateList[i,4:18]
|
|
|
43 substratemotif[8]<-"Y"
|
|
|
44 #substratemotif<-paste(substratemotif,sep = "",collapse = "")
|
|
|
45 j=i-1
|
|
|
46 substratemotif<-unlist(substratemotif)
|
|
|
47 substrates[j,1:15]<-substratemotif
|
|
|
48 }
|
|
|
49
|
|
|
50 # SpacesToOs<-c(""="O",)
|
|
|
51 # substrates<-SpacesToOs[substrates]
|
|
|
52
|
|
|
53 SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2]
|
|
|
54
|
|
|
55 if(2==2){
|
|
|
56 Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
|
|
|
57 Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
|
|
|
58 Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
|
|
|
59 Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
|
|
|
60 Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
|
|
|
61 Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
|
|
|
62 Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
|
|
|
63 Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
|
|
|
64 Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
|
|
|
65 Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
|
|
|
66 Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
|
|
|
67 Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
|
|
|
68 Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
|
|
|
69 Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
|
|
|
70 Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
|
|
|
71 Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
|
|
|
72 Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
|
|
|
73 Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
|
|
|
74 Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
|
|
|
75 Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
|
|
|
76
|
|
|
77 AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean)
|
|
|
78
|
|
|
79 Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE)
|
|
|
80 Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE)
|
|
|
81 Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE)
|
|
|
82 Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE)
|
|
|
83 Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE)
|
|
|
84 Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE)
|
|
|
85 Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE)
|
|
|
86 Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE)
|
|
|
87 Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE)
|
|
|
88 Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE)
|
|
|
89 Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE)
|
|
|
90 Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE)
|
|
|
91 Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE)
|
|
|
92 Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE)
|
|
|
93 Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE)
|
|
|
94 Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE)
|
|
|
95 Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE)
|
|
|
96 Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE)
|
|
|
97 Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE)
|
|
|
98 Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE)
|
|
|
99 }
|
|
|
100 AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd)
|
|
|
101 #this is subbackfreq SDs
|
|
|
102
|
|
|
103 SBF_statisticalvalues<-cbind(AllMeans,AllSDs)
|
|
|
104
|
|
|
105 #create the percent table
|
|
|
106 if (1==1){
|
|
|
107 Column1<-substrates[,1]
|
|
|
108 Column2<-substrates[,2]
|
|
|
109 Column3<-substrates[,3]
|
|
|
110 Column4<-substrates[,4]
|
|
|
111 Column5<-substrates[,5]
|
|
|
112 Column6<-substrates[,6]
|
|
|
113 Column7<-substrates[,7]
|
|
|
114 Column8<-substrates[,8]
|
|
|
115 Column9<-substrates[,9]
|
|
|
116 Column10<-substrates[,10]
|
|
|
117 Column11<-substrates[,11]
|
|
|
118 Column12<-substrates[,12]
|
|
|
119 Column13<-substrates[,13]
|
|
|
120 Column14<-substrates[,14]
|
|
|
121 Column15<-substrates[,15]
|
|
|
122
|
|
|
123 spaces1<-sum((Column1%in% ""))
|
|
|
124 spaces2<-sum(Column2%in% "")
|
|
|
125 spaces3<-sum(Column3%in% "")
|
|
|
126 spaces4<-sum(Column4%in% "")
|
|
|
127 spaces5<-sum(Column5%in% "")
|
|
|
128 spaces6<-sum(Column6%in% "")
|
|
|
129 spaces7<-sum(Column7%in% "")
|
|
|
130 spaces8<-sum(Column8%in% "")
|
|
|
131 spaces9<-sum(Column9%in% "")
|
|
|
132 spaces10<-sum(Column10%in% "")
|
|
|
133 spaces11<-sum(Column11%in% "")
|
|
|
134 spaces12<-sum(Column12%in% "")
|
|
|
135 spaces13<-sum(Column13%in% "")
|
|
|
136 spaces14<-sum(Column14%in% "")
|
|
|
137 spaces15<-sum(Column15%in% "")
|
|
|
138
|
|
|
139 A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1)
|
|
|
140 A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2)
|
|
|
141 A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3)
|
|
|
142 A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4)
|
|
|
143 A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5)
|
|
|
144 A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6)
|
|
|
145 A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7)
|
|
|
146 A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8)
|
|
|
147 A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9)
|
|
|
148 A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10)
|
|
|
149 A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11)
|
|
|
150 A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12)
|
|
|
151 A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13)
|
|
|
152 A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14)
|
|
|
153 A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15)
|
|
|
154 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
|
|
|
155
|
|
|
156 C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1)
|
|
|
157 C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2)
|
|
|
158 C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3)
|
|
|
159 C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4)
|
|
|
160 C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5)
|
|
|
161 C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6)
|
|
|
162 C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7)
|
|
|
163 C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8)
|
|
|
164 C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9)
|
|
|
165 C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10)
|
|
|
166 C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11)
|
|
|
167 C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12)
|
|
|
168 C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13)
|
|
|
169 C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14)
|
|
|
170 C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15)
|
|
|
171 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
|
|
|
172
|
|
|
173 D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1)
|
|
|
174 D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2)
|
|
|
175 D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3)
|
|
|
176 D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4)
|
|
|
177 D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5)
|
|
|
178 D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6)
|
|
|
179 D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7)
|
|
|
180 D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8)
|
|
|
181 D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9)
|
|
|
182 D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10)
|
|
|
183 D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11)
|
|
|
184 D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12)
|
|
|
185 D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13)
|
|
|
186 D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14)
|
|
|
187 D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15)
|
|
|
188 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
|
|
|
189
|
|
|
190 E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1)
|
|
|
191 E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2)
|
|
|
192 E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3)
|
|
|
193 E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4)
|
|
|
194 E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5)
|
|
|
195 E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6)
|
|
|
196 E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7)
|
|
|
197 E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8)
|
|
|
198 E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9)
|
|
|
199 E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10)
|
|
|
200 E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11)
|
|
|
201 E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12)
|
|
|
202 E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13)
|
|
|
203 E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14)
|
|
|
204 E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15)
|
|
|
205 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
|
|
|
206
|
|
|
207
|
|
|
208 F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1)
|
|
|
209 F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2)
|
|
|
210 F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3)
|
|
|
211 F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4)
|
|
|
212 F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5)
|
|
|
213 F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6)
|
|
|
214 F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7)
|
|
|
215 F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8)
|
|
|
216 F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9)
|
|
|
217 F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10)
|
|
|
218 F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11)
|
|
|
219 F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12)
|
|
|
220 F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13)
|
|
|
221 F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14)
|
|
|
222 F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15)
|
|
|
223 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
|
|
|
224
|
|
|
225
|
|
|
226 G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1)
|
|
|
227 G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2)
|
|
|
228 G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3)
|
|
|
229 G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4)
|
|
|
230 G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5)
|
|
|
231 G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6)
|
|
|
232 G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7)
|
|
|
233 G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8)
|
|
|
234 G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9)
|
|
|
235 G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10)
|
|
|
236 G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11)
|
|
|
237 G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12)
|
|
|
238 G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13)
|
|
|
239 G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14)
|
|
|
240 G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15)
|
|
|
241 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
|
|
|
242
|
|
|
243
|
|
|
244 H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1)
|
|
|
245 H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2)
|
|
|
246 H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3)
|
|
|
247 H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4)
|
|
|
248 H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5)
|
|
|
249 H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6)
|
|
|
250 H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7)
|
|
|
251 H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8)
|
|
|
252 H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9)
|
|
|
253 H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10)
|
|
|
254 H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11)
|
|
|
255 H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12)
|
|
|
256 H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13)
|
|
|
257 H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14)
|
|
|
258 H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15)
|
|
|
259 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
|
|
|
260
|
|
|
261
|
|
|
262 I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1)
|
|
|
263 I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2)
|
|
|
264 I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3)
|
|
|
265 I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4)
|
|
|
266 I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5)
|
|
|
267 I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6)
|
|
|
268 I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7)
|
|
|
269 I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8)
|
|
|
270 I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9)
|
|
|
271 I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10)
|
|
|
272 I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11)
|
|
|
273 I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12)
|
|
|
274 I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13)
|
|
|
275 I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14)
|
|
|
276 I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15)
|
|
|
277 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
|
|
|
278
|
|
|
279
|
|
|
280 K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1)
|
|
|
281 K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2)
|
|
|
282 K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3)
|
|
|
283 K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4)
|
|
|
284 K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5)
|
|
|
285 K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6)
|
|
|
286 K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7)
|
|
|
287 K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8)
|
|
|
288 K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9)
|
|
|
289 K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10)
|
|
|
290 K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11)
|
|
|
291 K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12)
|
|
|
292 K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13)
|
|
|
293 K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14)
|
|
|
294 K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15)
|
|
|
295 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
|
|
|
296
|
|
|
297
|
|
|
298 L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1)
|
|
|
299 L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2)
|
|
|
300 L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3)
|
|
|
301 L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4)
|
|
|
302 L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5)
|
|
|
303 L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6)
|
|
|
304 L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7)
|
|
|
305 L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8)
|
|
|
306 L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9)
|
|
|
307 L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10)
|
|
|
308 L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11)
|
|
|
309 L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12)
|
|
|
310 L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13)
|
|
|
311 L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14)
|
|
|
312 L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15)
|
|
|
313 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
|
|
|
314
|
|
|
315
|
|
|
316 M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1)
|
|
|
317 M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2)
|
|
|
318 M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3)
|
|
|
319 M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4)
|
|
|
320 M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5)
|
|
|
321 M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6)
|
|
|
322 M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7)
|
|
|
323 M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8)
|
|
|
324 M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9)
|
|
|
325 M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10)
|
|
|
326 M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11)
|
|
|
327 M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12)
|
|
|
328 M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13)
|
|
|
329 M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14)
|
|
|
330 M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15)
|
|
|
331 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
|
|
|
332
|
|
|
333
|
|
|
334 N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1)
|
|
|
335 N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2)
|
|
|
336 N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3)
|
|
|
337 N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4)
|
|
|
338 N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5)
|
|
|
339 N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6)
|
|
|
340 N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7)
|
|
|
341 N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8)
|
|
|
342 N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9)
|
|
|
343 N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10)
|
|
|
344 N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11)
|
|
|
345 N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12)
|
|
|
346 N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13)
|
|
|
347 N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14)
|
|
|
348 N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15)
|
|
|
349 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
|
|
|
350
|
|
|
351
|
|
|
352 P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1)
|
|
|
353 P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2)
|
|
|
354 P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3)
|
|
|
355 P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4)
|
|
|
356 P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5)
|
|
|
357 P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6)
|
|
|
358 P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7)
|
|
|
359 P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8)
|
|
|
360 P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9)
|
|
|
361 P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10)
|
|
|
362 P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11)
|
|
|
363 P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12)
|
|
|
364 P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13)
|
|
|
365 P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14)
|
|
|
366 P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15)
|
|
|
367 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
|
|
|
368
|
|
|
369
|
|
|
370 Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1)
|
|
|
371 Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2)
|
|
|
372 Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3)
|
|
|
373 Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4)
|
|
|
374 Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5)
|
|
|
375 Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6)
|
|
|
376 Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7)
|
|
|
377 Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8)
|
|
|
378 Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9)
|
|
|
379 Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10)
|
|
|
380 Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11)
|
|
|
381 Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12)
|
|
|
382 Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13)
|
|
|
383 Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14)
|
|
|
384 Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15)
|
|
|
385 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
|
|
|
386
|
|
|
387
|
|
|
388 R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1)
|
|
|
389 R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2)
|
|
|
390 R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3)
|
|
|
391 R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4)
|
|
|
392 R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5)
|
|
|
393 R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6)
|
|
|
394 R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7)
|
|
|
395 R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8)
|
|
|
396 R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9)
|
|
|
397 R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10)
|
|
|
398 R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11)
|
|
|
399 R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12)
|
|
|
400 R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13)
|
|
|
401 R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14)
|
|
|
402 R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15)
|
|
|
403 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
|
|
|
404
|
|
|
405
|
|
|
406 S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1)
|
|
|
407 S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2)
|
|
|
408 S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3)
|
|
|
409 S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4)
|
|
|
410 S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5)
|
|
|
411 S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6)
|
|
|
412 S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7)
|
|
|
413 S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8)
|
|
|
414 S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9)
|
|
|
415 S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10)
|
|
|
416 S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11)
|
|
|
417 S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12)
|
|
|
418 S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13)
|
|
|
419 S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14)
|
|
|
420 S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15)
|
|
|
421 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
|
|
|
422
|
|
|
423
|
|
|
424 T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1)
|
|
|
425 T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2)
|
|
|
426 T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3)
|
|
|
427 T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4)
|
|
|
428 T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5)
|
|
|
429 T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6)
|
|
|
430 T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7)
|
|
|
431 T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8)
|
|
|
432 T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9)
|
|
|
433 T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10)
|
|
|
434 T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11)
|
|
|
435 T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12)
|
|
|
436 T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13)
|
|
|
437 T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14)
|
|
|
438 T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15)
|
|
|
439 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
|
|
|
440
|
|
|
441
|
|
|
442 V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1)
|
|
|
443 V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2)
|
|
|
444 V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3)
|
|
|
445 V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4)
|
|
|
446 V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5)
|
|
|
447 V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6)
|
|
|
448 V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7)
|
|
|
449 V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8)
|
|
|
450 V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9)
|
|
|
451 V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10)
|
|
|
452 V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11)
|
|
|
453 V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12)
|
|
|
454 V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13)
|
|
|
455 V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14)
|
|
|
456 V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15)
|
|
|
457 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
|
|
|
458
|
|
|
459
|
|
|
460 W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1)
|
|
|
461 W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2)
|
|
|
462 W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3)
|
|
|
463 W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4)
|
|
|
464 W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5)
|
|
|
465 W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6)
|
|
|
466 W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7)
|
|
|
467 W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8)
|
|
|
468 W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9)
|
|
|
469 W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10)
|
|
|
470 W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11)
|
|
|
471 W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12)
|
|
|
472 W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13)
|
|
|
473 W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14)
|
|
|
474 W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15)
|
|
|
475 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
|
|
|
476
|
|
|
477
|
|
|
478 Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1)
|
|
|
479 Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2)
|
|
|
480 Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3)
|
|
|
481 Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4)
|
|
|
482 Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5)
|
|
|
483 Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6)
|
|
|
484 Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7)
|
|
|
485 Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8)
|
|
|
486 Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9)
|
|
|
487 Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10)
|
|
|
488 Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11)
|
|
|
489 Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12)
|
|
|
490 Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13)
|
|
|
491 Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14)
|
|
|
492 Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15)
|
|
|
493 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
|
|
|
494 }
|
|
|
495 #this is substrate percents
|
|
|
496
|
|
|
497 #A C D E F G H I K L N P Q R S T V W Y
|
|
|
498
|
|
|
499 PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
|
|
|
500 PercentTable<-PercentTable*100
|
|
|
501
|
|
|
502 #create the SD table
|
|
|
503 SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable))
|
|
|
504 #for every row, a percertage minus the same mean over the same SD
|
|
|
505 if(1==1){
|
|
|
506 SDtable[1,]<-(PercentTable[1,]-Amean)/Asd
|
|
|
507 SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd
|
|
|
508 SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd
|
|
|
509 SDtable[4,]<-(PercentTable[4,]-Emean)/Esd
|
|
|
510 SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd
|
|
|
511 SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd
|
|
|
512 SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd
|
|
|
513 SDtable[8,]<-(PercentTable[8,]-Imean)/Isd
|
|
|
514 SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd
|
|
|
515 SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd
|
|
|
516 SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd
|
|
|
517 SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd
|
|
|
518 SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd
|
|
|
519 SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd
|
|
|
520 SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd
|
|
|
521 SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd
|
|
|
522 SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd
|
|
|
523 SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd
|
|
|
524 SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd
|
|
|
525 SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd
|
|
|
526 }
|
|
|
527
|
|
|
528
|
|
|
529 SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y")
|
|
|
530
|
|
|
531
|
|
|
532 SumOfSigmaAAs<-c(1:15)
|
|
|
533
|
|
|
534 for (i in 1:15){
|
|
|
535 SumOfSigmasValue<-0
|
|
|
536 for (j in 1:20){
|
|
|
537 value<-0
|
|
|
538 if (SDtable[j,i]>2){
|
|
|
539 value<-sum(substrates[,i]==SetOfAAs[j])
|
|
|
540 }
|
|
|
541 SumOfSigmasValue<-SumOfSigmasValue+value
|
|
|
542 }
|
|
|
543 SumOfSigmaAAs[i]<-SumOfSigmasValue
|
|
|
544 }
|
|
|
545
|
|
|
546 # AAs1<-length(substrates[,1])-sum(substrates[,1]=="")
|
|
|
547 # AAs2<-length(substrates[,2])-sum(substrates[,2]=="")
|
|
|
548 # AAs3<-length(substrates[,3])-sum(substrates[,3]=="")
|
|
|
549 # AAs4<-length(substrates[,4])-sum(substrates[,4]=="")
|
|
|
550 # AAs5<-length(substrates[,5])-sum(substrates[,5]=="")
|
|
|
551 # AAs6<-length(substrates[,6])-sum(substrates[,6]=="")
|
|
|
552 # AAs7<-length(substrates[,7])-sum(substrates[,7]=="")
|
|
|
553 # AAs8<-length(substrates[,8])-sum(substrates[,8]=="")
|
|
|
554 # AAs9<-length(substrates[,9])-sum(substrates[,9]=="")
|
|
|
555 #
|
|
|
556 #
|
|
|
557 #
|
|
|
558 # #AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9)
|
|
|
559 # AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]),
|
|
|
560 # length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]),
|
|
|
561 # length(substrates[,9]))
|
|
|
562
|
|
|
563 SumOfExpectedSigmaAAs<-c(1:15)
|
|
|
564 for (i in 1:15){
|
|
|
565 ExpectedValue<-0
|
|
|
566 for (j in 1:20){
|
|
|
567 value<-0
|
|
|
568 if (SDtable[j,i]>2){
|
|
|
569 value<-AllMeans[j]
|
|
|
570 }
|
|
|
571 ExpectedValue<-ExpectedValue+value
|
|
|
572 }
|
|
|
573 SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100
|
|
|
574 }
|
|
|
575
|
|
|
576 SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs
|
|
|
577 SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow)
|
|
|
578
|
|
|
579 SetOfAAs<-matrix(data = SetOfAAs,ncol = 1)
|
|
|
580
|
|
|
581 SDtableu<-SDtable
|
|
|
582 HeaderSD<-c(-7:7)
|
|
|
583 SDtable<-rbind(HeaderSD,SDtableu)
|
|
|
584 row.names(SDtable)<-NULL
|
|
|
585 SDtable<-data.frame(SetOfAAs,SDtable)
|
|
|
586
|
|
|
587 PercentTable<-rbind(HeaderSD,PercentTable)
|
|
|
588 row.names(PercentTable)<-NULL
|
|
|
589 PercentTable<-data.frame(SetOfAAs,PercentTable)
|
|
|
590 numberofY<-as.numeric(SubstrateBackgroundFrequency$Number.of.Y)
|
|
|
591 numberofY<-numberofY[!is.na(numberofY)]
|
|
|
592
|
|
|
593 numberofPY<-as.numeric(SubstrateBackgroundFrequency$Number.of.pY)
|
|
|
594 numberofPY<-numberofPY[!is.na(numberofPY)]
|
|
|
595
|
|
|
596 NormalizationScore<-sum(numberofPY)/sum(numberofY)
|
|
|
597
|
|
|
598 # positions<-matrix(data = NA, nrow=20,ncol = 15)
|
|
|
599 #
|
|
|
600 # #column1
|
|
|
601 #
|
|
|
602 # for (q in 1:15) {
|
|
|
603 # sA<-sum(substrates[,i]=="A")
|
|
|
604 # positions[1,i]<-sA
|
|
|
605 # sC<-sum(substrates[,i]=="C")
|
|
|
606 # positions[2,i]<-sC
|
|
|
607 # sD<-sum(substrates[,i]=="D")
|
|
|
608 # positions[3,i]<-sD
|
|
|
609 # sE<-sum(substrates[,i]=="E")
|
|
|
610 # positions[4,i]<-sE
|
|
|
611 # sF<-sum(substrates[,i]=="F")
|
|
|
612 # sG<-sum(substrates[,i]=="G")
|
|
|
613 # sH<-sum(substrates[,i]=="H")
|
|
|
614 # sI<-sum(substrates[,i]=="I")
|
|
|
615 # sK<-sum(substrates[,i]=="K")
|
|
|
616 # sL<-sum(substrates[,i]=="L")
|
|
|
617 # sM<-sum(substrates[,i]=="M")
|
|
|
618 # sN<-sum(substrates[,i]=="N")
|
|
|
619 # sP<-sum(substrates[,i]=="P")
|
|
|
620 # sQ<-sum(substrates[,i]=="Q")
|
|
|
621 # sR<-sum(substrates[,i]=="R")
|
|
|
622 # sS<-sum(substrates[,i]=="S")
|
|
|
623 # sT<-sum(substrates[,i]=="T")
|
|
|
624 # sV<-sum(substrates[,i]=="V")
|
|
|
625 # sW<-sum(substrates[,i]=="W")
|
|
|
626 # sY<-sum(substrates[,i]=="Y")
|
|
|
627 # positions[5,i]<-sF
|
|
|
628 # positions[6,i]<-sG
|
|
|
629 # positions[7,i]<-sH
|
|
|
630 # positions[8,i]<-sI
|
|
|
631 # positions[9,i]<-sK
|
|
|
632 # positions[10,i]<-sL
|
|
|
633 # positions[11,i]<-sM
|
|
|
634 # positions[12,i]<-sN
|
|
|
635 # positions[13,i]<-sP
|
|
|
636 # positions[14,i]<-sQ
|
|
|
637 # positions[15,i]<-sR
|
|
|
638 # positions[16,i]<-sS
|
|
|
639 # positions[17,i]<-sT
|
|
|
640 # positions[18,i]<-sV
|
|
|
641 # positions[19,i]<-sW
|
|
|
642 # positions[20,i]<-sY
|
|
|
643 # }
|
|
|
644
|
|
|
645 #here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot?
|
|
|
646 if (6==6){
|
|
|
647 Column1<-substrates[,1]
|
|
|
648 Column2<-substrates[,2]
|
|
|
649 Column3<-substrates[,3]
|
|
|
650 Column4<-substrates[,4]
|
|
|
651 Column5<-substrates[,5]
|
|
|
652 Column6<-substrates[,6]
|
|
|
653 Column7<-substrates[,7]
|
|
|
654 Column8<-substrates[,8]
|
|
|
655 Column9<-substrates[,9]
|
|
|
656 Column10<-substrates[,10]
|
|
|
657 Column11<-substrates[,11]
|
|
|
658 Column12<-substrates[,12]
|
|
|
659 Column13<-substrates[,13]
|
|
|
660 Column14<-substrates[,14]
|
|
|
661 Column15<-substrates[,15]
|
|
|
662
|
|
|
663 spaces1<-sum((Column1%in% ""))
|
|
|
664 spaces2<-sum(Column2%in% "")
|
|
|
665 spaces3<-sum(Column3%in% "")
|
|
|
666 spaces4<-sum(Column4%in% "")
|
|
|
667 spaces5<-sum(Column5%in% "")
|
|
|
668 spaces6<-sum(Column6%in% "")
|
|
|
669 spaces7<-sum(Column7%in% "")
|
|
|
670 spaces8<-sum(Column8%in% "")
|
|
|
671 spaces9<-sum(Column9%in% "")
|
|
|
672 spaces10<-sum(Column10%in% "")
|
|
|
673 spaces11<-sum(Column11%in% "")
|
|
|
674 spaces12<-sum(Column12%in% "")
|
|
|
675 spaces13<-sum(Column13%in% "")
|
|
|
676 spaces14<-sum(Column14%in% "")
|
|
|
677 spaces15<-sum(Column15%in% "")
|
|
|
678
|
|
|
679 A1<-sum(Column1 %in% "A")
|
|
|
680 A2<-sum(Column2 %in% "A")
|
|
|
681 A3<-sum(Column3 %in% "A")
|
|
|
682 A4<-sum(Column4 %in% "A")
|
|
|
683 A5<-sum(Column5 %in% "A")
|
|
|
684 A6<-sum(Column6 %in% "A")
|
|
|
685 A7<-sum(Column7 %in% "A")
|
|
|
686 A8<-sum(Column8 %in% "A")
|
|
|
687 A9<-sum(Column9 %in% "A")
|
|
|
688 A10<-sum(Column10 %in% "A")
|
|
|
689 A11<-sum(Column11 %in% "A")
|
|
|
690 A12<-sum(Column12 %in% "A")
|
|
|
691 A13<-sum(Column13 %in% "A")
|
|
|
692 A14<-sum(Column14 %in% "A")
|
|
|
693 A15<-sum(Column15 %in% "A")
|
|
|
694 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15)
|
|
|
695
|
|
|
696 C1<-sum(Column1 %in% "C")
|
|
|
697 C2<-sum(Column2 %in% "C")
|
|
|
698 C3<-sum(Column3 %in% "C")
|
|
|
699 C4<-sum(Column4 %in% "C")
|
|
|
700 C5<-sum(Column5 %in% "C")
|
|
|
701 C6<-sum(Column6 %in% "C")
|
|
|
702 C7<-sum(Column7 %in% "C")
|
|
|
703 C8<-sum(Column8 %in% "C")
|
|
|
704 C9<-sum(Column9 %in% "C")
|
|
|
705 C10<-sum(Column10 %in% "C")
|
|
|
706 C11<-sum(Column11 %in% "C")
|
|
|
707 C12<-sum(Column12 %in% "C")
|
|
|
708 C13<-sum(Column13 %in% "C")
|
|
|
709 C14<-sum(Column14 %in% "C")
|
|
|
710 C15<-sum(Column15 %in% "C")
|
|
|
711 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15)
|
|
|
712
|
|
|
713 D1<-sum(Column1 %in% "D")
|
|
|
714 D2<-sum(Column2 %in% "D")
|
|
|
715 D3<-sum(Column3 %in% "D")
|
|
|
716 D4<-sum(Column4 %in% "D")
|
|
|
717 D5<-sum(Column5 %in% "D")
|
|
|
718 D6<-sum(Column6 %in% "D")
|
|
|
719 D7<-sum(Column7 %in% "D")
|
|
|
720 D8<-sum(Column8 %in% "D")
|
|
|
721 D9<-sum(Column9 %in% "D")
|
|
|
722 D10<-sum(Column10 %in% "D")
|
|
|
723 D11<-sum(Column11 %in% "D")
|
|
|
724 D12<-sum(Column12 %in% "D")
|
|
|
725 D13<-sum(Column13 %in% "D")
|
|
|
726 D14<-sum(Column14 %in% "D")
|
|
|
727 D15<-sum(Column15 %in% "D")
|
|
|
728 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15)
|
|
|
729
|
|
|
730 E1<-sum(Column1 %in% "E")
|
|
|
731 E2<-sum(Column2 %in% "E")
|
|
|
732 E3<-sum(Column3 %in% "E")
|
|
|
733 E4<-sum(Column4 %in% "E")
|
|
|
734 E5<-sum(Column5 %in% "E")
|
|
|
735 E6<-sum(Column6 %in% "E")
|
|
|
736 E7<-sum(Column7 %in% "E")
|
|
|
737 E8<-sum(Column8 %in% "E")
|
|
|
738 E9<-sum(Column9 %in% "E")
|
|
|
739 E10<-sum(Column10 %in% "E")
|
|
|
740 E11<-sum(Column11 %in% "E")
|
|
|
741 E12<-sum(Column12 %in% "E")
|
|
|
742 E13<-sum(Column13 %in% "E")
|
|
|
743 E14<-sum(Column14 %in% "E")
|
|
|
744 E15<-sum(Column15 %in% "E")
|
|
|
745 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15)
|
|
|
746
|
|
|
747 F1<-sum(Column1 %in% "F")
|
|
|
748 F2<-sum(Column2 %in% "F")
|
|
|
749 F3<-sum(Column3 %in% "F")
|
|
|
750 F4<-sum(Column4 %in% "F")
|
|
|
751 F5<-sum(Column5 %in% "F")
|
|
|
752 F6<-sum(Column6 %in% "F")
|
|
|
753 F7<-sum(Column7 %in% "F")
|
|
|
754 F8<-sum(Column8 %in% "F")
|
|
|
755 F9<-sum(Column9 %in% "F")
|
|
|
756 F10<-sum(Column10 %in% "F")
|
|
|
757 F11<-sum(Column11 %in% "F")
|
|
|
758 F12<-sum(Column12 %in% "F")
|
|
|
759 F13<-sum(Column13 %in% "F")
|
|
|
760 F14<-sum(Column14 %in% "F")
|
|
|
761 F15<-sum(Column15 %in% "F")
|
|
|
762 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15)
|
|
|
763
|
|
|
764 G1<-sum(Column1 %in% "G")
|
|
|
765 G2<-sum(Column2 %in% "G")
|
|
|
766 G3<-sum(Column3 %in% "G")
|
|
|
767 G4<-sum(Column4 %in% "G")
|
|
|
768 G5<-sum(Column5 %in% "G")
|
|
|
769 G6<-sum(Column6 %in% "G")
|
|
|
770 G7<-sum(Column7 %in% "G")
|
|
|
771 G8<-sum(Column8 %in% "G")
|
|
|
772 G9<-sum(Column9 %in% "G")
|
|
|
773 G10<-sum(Column10 %in% "G")
|
|
|
774 G11<-sum(Column11 %in% "G")
|
|
|
775 G12<-sum(Column12 %in% "G")
|
|
|
776 G13<-sum(Column13 %in% "G")
|
|
|
777 G14<-sum(Column14 %in% "G")
|
|
|
778 G15<-sum(Column15 %in% "G")
|
|
|
779 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15)
|
|
|
780
|
|
|
781 H1<-sum(Column1 %in% "H")
|
|
|
782 H2<-sum(Column2 %in% "H")
|
|
|
783 H3<-sum(Column3 %in% "H")
|
|
|
784 H4<-sum(Column4 %in% "H")
|
|
|
785 H5<-sum(Column5 %in% "H")
|
|
|
786 H6<-sum(Column6 %in% "H")
|
|
|
787 H7<-sum(Column7 %in% "H")
|
|
|
788 H8<-sum(Column8 %in% "H")
|
|
|
789 H9<-sum(Column9 %in% "H")
|
|
|
790 H10<-sum(Column10 %in% "H")
|
|
|
791 H11<-sum(Column11 %in% "H")
|
|
|
792 H12<-sum(Column12 %in% "H")
|
|
|
793 H13<-sum(Column13 %in% "H")
|
|
|
794 H14<-sum(Column14 %in% "H")
|
|
|
795 H15<-sum(Column15 %in% "H")
|
|
|
796 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15)
|
|
|
797
|
|
|
798 I1<-sum(Column1 %in% "I")
|
|
|
799 I2<-sum(Column2 %in% "I")
|
|
|
800 I3<-sum(Column3 %in% "I")
|
|
|
801 I4<-sum(Column4 %in% "I")
|
|
|
802 I5<-sum(Column5 %in% "I")
|
|
|
803 I6<-sum(Column6 %in% "I")
|
|
|
804 I7<-sum(Column7 %in% "I")
|
|
|
805 I8<-sum(Column8 %in% "I")
|
|
|
806 I9<-sum(Column9 %in% "I")
|
|
|
807 I10<-sum(Column10 %in% "I")
|
|
|
808 I11<-sum(Column11 %in% "I")
|
|
|
809 I12<-sum(Column12 %in% "I")
|
|
|
810 I13<-sum(Column13 %in% "I")
|
|
|
811 I14<-sum(Column14 %in% "I")
|
|
|
812 I15<-sum(Column15 %in% "I")
|
|
|
813 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15)
|
|
|
814
|
|
|
815 K1<-sum(Column1 %in% "K")
|
|
|
816 K2<-sum(Column2 %in% "K")
|
|
|
817 K3<-sum(Column3 %in% "K")
|
|
|
818 K4<-sum(Column4 %in% "K")
|
|
|
819 K5<-sum(Column5 %in% "K")
|
|
|
820 K6<-sum(Column6 %in% "K")
|
|
|
821 K7<-sum(Column7 %in% "K")
|
|
|
822 K8<-sum(Column8 %in% "K")
|
|
|
823 K9<-sum(Column9 %in% "K")
|
|
|
824 K10<-sum(Column10 %in% "K")
|
|
|
825 K11<-sum(Column11 %in% "K")
|
|
|
826 K12<-sum(Column12 %in% "K")
|
|
|
827 K13<-sum(Column13 %in% "K")
|
|
|
828 K14<-sum(Column14 %in% "K")
|
|
|
829 K15<-sum(Column15 %in% "K")
|
|
|
830 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15)
|
|
|
831
|
|
|
832 L1<-sum(Column1 %in% "L")
|
|
|
833 L2<-sum(Column2 %in% "L")
|
|
|
834 L3<-sum(Column3 %in% "L")
|
|
|
835 L4<-sum(Column4 %in% "L")
|
|
|
836 L5<-sum(Column5 %in% "L")
|
|
|
837 L6<-sum(Column6 %in% "L")
|
|
|
838 L7<-sum(Column7 %in% "L")
|
|
|
839 L8<-sum(Column8 %in% "L")
|
|
|
840 L9<-sum(Column9 %in% "L")
|
|
|
841 L10<-sum(Column10 %in% "L")
|
|
|
842 L11<-sum(Column11 %in% "L")
|
|
|
843 L12<-sum(Column12 %in% "L")
|
|
|
844 L13<-sum(Column13 %in% "L")
|
|
|
845 L14<-sum(Column14 %in% "L")
|
|
|
846 L15<-sum(Column15 %in% "L")
|
|
|
847 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15)
|
|
|
848
|
|
|
849 M1<-sum(Column1 %in% "M")
|
|
|
850 M2<-sum(Column2 %in% "M")
|
|
|
851 M3<-sum(Column3 %in% "M")
|
|
|
852 M4<-sum(Column4 %in% "M")
|
|
|
853 M5<-sum(Column5 %in% "M")
|
|
|
854 M6<-sum(Column6 %in% "M")
|
|
|
855 M7<-sum(Column7 %in% "M")
|
|
|
856 M8<-sum(Column8 %in% "M")
|
|
|
857 M9<-sum(Column9 %in% "M")
|
|
|
858 M10<-sum(Column10 %in% "M")
|
|
|
859 M11<-sum(Column11 %in% "M")
|
|
|
860 M12<-sum(Column12 %in% "M")
|
|
|
861 M13<-sum(Column13 %in% "M")
|
|
|
862 M14<-sum(Column14 %in% "M")
|
|
|
863 M15<-sum(Column15 %in% "M")
|
|
|
864 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15)
|
|
|
865
|
|
|
866 N1<-sum(Column1 %in% "N")
|
|
|
867 N2<-sum(Column2 %in% "N")
|
|
|
868 N3<-sum(Column3 %in% "N")
|
|
|
869 N4<-sum(Column4 %in% "N")
|
|
|
870 N5<-sum(Column5 %in% "N")
|
|
|
871 N6<-sum(Column6 %in% "N")
|
|
|
872 N7<-sum(Column7 %in% "N")
|
|
|
873 N8<-sum(Column8 %in% "N")
|
|
|
874 N9<-sum(Column9 %in% "N")
|
|
|
875 N10<-sum(Column10 %in% "N")
|
|
|
876 N11<-sum(Column11 %in% "N")
|
|
|
877 N12<-sum(Column12 %in% "N")
|
|
|
878 N13<-sum(Column13 %in% "N")
|
|
|
879 N14<-sum(Column14 %in% "N")
|
|
|
880 N15<-sum(Column15 %in% "N")
|
|
|
881 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15)
|
|
|
882
|
|
|
883 P1<-sum(Column1 %in% "P")
|
|
|
884 P2<-sum(Column2 %in% "P")
|
|
|
885 P3<-sum(Column3 %in% "P")
|
|
|
886 P4<-sum(Column4 %in% "P")
|
|
|
887 P5<-sum(Column5 %in% "P")
|
|
|
888 P6<-sum(Column6 %in% "P")
|
|
|
889 P7<-sum(Column7 %in% "P")
|
|
|
890 P8<-sum(Column8 %in% "P")
|
|
|
891 P9<-sum(Column9 %in% "P")
|
|
|
892 P10<-sum(Column10 %in% "P")
|
|
|
893 P11<-sum(Column11 %in% "P")
|
|
|
894 P12<-sum(Column12 %in% "P")
|
|
|
895 P13<-sum(Column13 %in% "P")
|
|
|
896 P14<-sum(Column14 %in% "P")
|
|
|
897 P15<-sum(Column15 %in% "P")
|
|
|
898 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15)
|
|
|
899
|
|
|
900 Q1<-sum(Column1 %in% "Q")
|
|
|
901 Q2<-sum(Column2 %in% "Q")
|
|
|
902 Q3<-sum(Column3 %in% "Q")
|
|
|
903 Q4<-sum(Column4 %in% "Q")
|
|
|
904 Q5<-sum(Column5 %in% "Q")
|
|
|
905 Q6<-sum(Column6 %in% "Q")
|
|
|
906 Q7<-sum(Column7 %in% "Q")
|
|
|
907 Q8<-sum(Column8 %in% "Q")
|
|
|
908 Q9<-sum(Column9 %in% "Q")
|
|
|
909 Q10<-sum(Column10 %in% "Q")
|
|
|
910 Q11<-sum(Column11 %in% "Q")
|
|
|
911 Q12<-sum(Column12 %in% "Q")
|
|
|
912 Q13<-sum(Column13 %in% "Q")
|
|
|
913 Q14<-sum(Column14 %in% "Q")
|
|
|
914 Q15<-sum(Column15 %in% "Q")
|
|
|
915 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15)
|
|
|
916
|
|
|
917 R1<-sum(Column1 %in% "R")
|
|
|
918 R2<-sum(Column2 %in% "R")
|
|
|
919 R3<-sum(Column3 %in% "R")
|
|
|
920 R4<-sum(Column4 %in% "R")
|
|
|
921 R5<-sum(Column5 %in% "R")
|
|
|
922 R6<-sum(Column6 %in% "R")
|
|
|
923 R7<-sum(Column7 %in% "R")
|
|
|
924 R8<-sum(Column8 %in% "R")
|
|
|
925 R9<-sum(Column9 %in% "R")
|
|
|
926 R10<-sum(Column10 %in% "R")
|
|
|
927 R11<-sum(Column11 %in% "R")
|
|
|
928 R12<-sum(Column12 %in% "R")
|
|
|
929 R13<-sum(Column13 %in% "R")
|
|
|
930 R14<-sum(Column14 %in% "R")
|
|
|
931 R15<-sum(Column15 %in% "R")
|
|
|
932 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15)
|
|
|
933
|
|
|
934 S1<-sum(Column1 %in% "S")
|
|
|
935 S2<-sum(Column2 %in% "S")
|
|
|
936 S3<-sum(Column3 %in% "S")
|
|
|
937 S4<-sum(Column4 %in% "S")
|
|
|
938 S5<-sum(Column5 %in% "S")
|
|
|
939 S6<-sum(Column6 %in% "S")
|
|
|
940 S7<-sum(Column7 %in% "S")
|
|
|
941 S8<-sum(Column8 %in% "S")
|
|
|
942 S9<-sum(Column9 %in% "S")
|
|
|
943 S10<-sum(Column10 %in% "S")
|
|
|
944 S11<-sum(Column11 %in% "S")
|
|
|
945 S12<-sum(Column12 %in% "S")
|
|
|
946 S13<-sum(Column13 %in% "S")
|
|
|
947 S14<-sum(Column14 %in% "S")
|
|
|
948 S15<-sum(Column15 %in% "S")
|
|
|
949 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15)
|
|
|
950
|
|
|
951 T1<-sum(Column1 %in% "T")
|
|
|
952 T2<-sum(Column2 %in% "T")
|
|
|
953 T3<-sum(Column3 %in% "T")
|
|
|
954 T4<-sum(Column4 %in% "T")
|
|
|
955 T5<-sum(Column5 %in% "T")
|
|
|
956 T6<-sum(Column6 %in% "T")
|
|
|
957 T7<-sum(Column7 %in% "T")
|
|
|
958 T8<-sum(Column8 %in% "T")
|
|
|
959 T9<-sum(Column9 %in% "T")
|
|
|
960 T10<-sum(Column10 %in% "T")
|
|
|
961 T11<-sum(Column11 %in% "T")
|
|
|
962 T12<-sum(Column12 %in% "T")
|
|
|
963 T13<-sum(Column13 %in% "T")
|
|
|
964 T14<-sum(Column14 %in% "T")
|
|
|
965 T15<-sum(Column15 %in% "T")
|
|
|
966 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15)
|
|
|
967
|
|
|
968 V1<-sum(Column1 %in% "V")
|
|
|
969 V2<-sum(Column2 %in% "V")
|
|
|
970 V3<-sum(Column3 %in% "V")
|
|
|
971 V4<-sum(Column4 %in% "V")
|
|
|
972 V5<-sum(Column5 %in% "V")
|
|
|
973 V6<-sum(Column6 %in% "V")
|
|
|
974 V7<-sum(Column7 %in% "V")
|
|
|
975 V8<-sum(Column8 %in% "V")
|
|
|
976 V9<-sum(Column9 %in% "V")
|
|
|
977 V10<-sum(Column10 %in% "V")
|
|
|
978 V11<-sum(Column11 %in% "V")
|
|
|
979 V12<-sum(Column12 %in% "V")
|
|
|
980 V13<-sum(Column13 %in% "V")
|
|
|
981 V14<-sum(Column14 %in% "V")
|
|
|
982 V15<-sum(Column15 %in% "V")
|
|
|
983 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15)
|
|
|
984
|
|
|
985 W1<-sum(Column1 %in% "W")
|
|
|
986 W2<-sum(Column2 %in% "W")
|
|
|
987 W3<-sum(Column3 %in% "W")
|
|
|
988 W4<-sum(Column4 %in% "W")
|
|
|
989 W5<-sum(Column5 %in% "W")
|
|
|
990 W6<-sum(Column6 %in% "W")
|
|
|
991 W7<-sum(Column7 %in% "W")
|
|
|
992 W8<-sum(Column8 %in% "W")
|
|
|
993 W9<-sum(Column9 %in% "W")
|
|
|
994 W10<-sum(Column10 %in% "W")
|
|
|
995 W11<-sum(Column11 %in% "W")
|
|
|
996 W12<-sum(Column12 %in% "W")
|
|
|
997 W13<-sum(Column13 %in% "W")
|
|
|
998 W14<-sum(Column14 %in% "W")
|
|
|
999 W15<-sum(Column15 %in% "W")
|
|
|
1000 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15)
|
|
|
1001
|
|
|
1002 Y1<-sum(Column1 %in% "Y")
|
|
|
1003 Y2<-sum(Column2 %in% "Y")
|
|
|
1004 Y3<-sum(Column3 %in% "Y")
|
|
|
1005 Y4<-sum(Column4 %in% "Y")
|
|
|
1006 Y5<-sum(Column5 %in% "Y")
|
|
|
1007 Y6<-sum(Column6 %in% "Y")
|
|
|
1008 Y7<-sum(Column7 %in% "Y")
|
|
|
1009 Y8<-sum(Column8 %in% "Y")
|
|
|
1010 Y9<-sum(Column9 %in% "Y")
|
|
|
1011 Y10<-sum(Column10 %in% "Y")
|
|
|
1012 Y11<-sum(Column11 %in% "Y")
|
|
|
1013 Y12<-sum(Column12 %in% "Y")
|
|
|
1014 Y13<-sum(Column13 %in% "Y")
|
|
|
1015 Y14<-sum(Column14 %in% "Y")
|
|
|
1016 Y15<-sum(Column15 %in% "Y")
|
|
|
1017 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15)
|
|
|
1018 PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs)
|
|
|
1019 }
|
|
|
1020 #endogenous prob matrix is AA position over subbackfreqmean
|
|
|
1021 dim(PositionTable)
|
|
|
1022 EPMtable<-PositionTable
|
|
|
1023 # EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean))
|
|
|
1024 # EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean))
|
|
|
1025 # EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean))
|
|
|
1026 # EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean))
|
|
|
1027 # EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean))
|
|
|
1028 # EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean))
|
|
|
1029 # EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean))
|
|
|
1030 # EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean))
|
|
|
1031 # EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean))
|
|
|
1032 # EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean))
|
|
|
1033 # EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean))
|
|
|
1034 # EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean))
|
|
|
1035 # EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean))
|
|
|
1036 # EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean))
|
|
|
1037 # EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean))
|
|
|
1038 # EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean))
|
|
|
1039 # EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean))
|
|
|
1040 # EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean))
|
|
|
1041 # EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean))
|
|
|
1042 # EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean))
|
|
|
1043
|
|
|
1044 columns<-c(length(Column1)-sum(Column1==""),
|
|
|
1045 length(Column2)-sum(Column2==""),
|
|
|
1046 length(Column3)-sum(Column3==""),
|
|
|
1047 length(Column4)-sum(Column4==""),
|
|
|
1048 length(Column5)-sum(Column5==""),
|
|
|
1049 length(Column6)-sum(Column6==""),
|
|
|
1050 length(Column7)-sum(Column7==""),
|
|
|
1051 length(Column8)-sum(Column8==""),
|
|
|
1052 length(Column9)-sum(Column9==""),
|
|
|
1053 length(Column10)-sum(Column10==""),
|
|
|
1054 length(Column11)-sum(Column11==""),
|
|
|
1055 length(Column12)-sum(Column12==""),
|
|
|
1056 length(Column13)-sum(Column13==""),
|
|
|
1057 length(Column14)-sum(Column14==""),
|
|
|
1058 length(Column15)-sum(Column15==""))
|
|
|
1059
|
|
|
1060 for (z in 1:15) {
|
|
|
1061 for (y in 1:20) {
|
|
|
1062 if (PositionTable[y,z]>0){
|
|
|
1063 EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y]))
|
|
|
1064 }
|
|
|
1065 if (PositionTable[y,z]==0){
|
|
|
1066 EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y]))
|
|
|
1067 }
|
|
|
1068 }
|
|
|
1069 }
|
|
|
1070 #here I created the endogenous probability matrix
|
|
|
1071 #now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs
|
|
|
1072
|
|
|
1073
|
|
|
1074
|
|
|
1075
|
|
|
1076
|
|
|
1077 # write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE)
|
|
|
1078 # write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE)
|
|
|
1079 # write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE)
|
|
|
1080 # write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE)
|
|
|
1081 # write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE)
|
|
|
1082
|
|
|
1083 NormalizationScore<-c("Normalization Score",NormalizationScore)
|
|
|
1084
|
|
|
1085 write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
|
|
|
1086 write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
|
|
|
1087 write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
|
|
|
1088 write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE)
|
|
|
1089
|
|
|
1090 EPMtableu<-EPMtable
|
|
|
1091 HeaderSD<-c(-7:7)
|
|
|
1092 EPMtableu<-rbind(HeaderSD,EPMtableu)
|
|
|
1093 row.names(EPMtableu)<-NULL
|
|
|
1094 EPMtableu<-data.frame(SetOfAAs,EPMtableu)
|
|
|
1095
|
|
|
1096 write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
|
|
|
1097 SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1)
|
|
|
1098 head<-matrix(data=rep(" ",times=16),nrow = 1)
|
|
|
1099 SelectivityHeader<-rbind(head,SelectivityHeader)
|
|
|
1100
|
|
|
1101 write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE)
|
|
|
1102 #colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7")
|
|
|
1103 write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE)
|
|
|
1104 write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE)
|
|
|
1105 write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
|
|
|
1106 write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE)
|
|
|
1107
|
|
|
1108
|
|
|
1109
|
|
|
1110
|
|
|
1111
|
|
|
1112
|
|
|
1113
|
|
|
1114
|
|
|
1115
|
|
|
1116
|
|
|
1117
|
|
|
1118
|
|
|
1119
|
|
|
1120
|
|
|
1121
|
|
|
1122
|
|
|
1123
|
|
|
1124
|
|
|
1125
|
|
|
1126
|
|
|
1127
|
|
|
1128
|
|
|
1129
|
|
|
1130
|
|
|
1131
|
|
|
1132
|
|
|
1133
|
|
|
1134
|
|
|
1135
|
|
|
1136
|
|
|
1137
|
|
|
1138
|
|
|
1139
|
|
|
1140
|
|
|
1141 #test myself: this script should take in amino acids for each of the 9 positions and give out every single combination of those AAs
|
|
|
1142
|
|
|
1143 #need to do following: fix it so that the accession numbers stay with the substrates,
|
|
|
1144 #also the neg false constant is totaly unphos'd Ys found by FASTA-2-CSV system# uniprot
|
|
|
1145
|
|
|
1146 #HOW MANY: IF THERE'S two aas in each position you get 2^9, so I assume the numbers are:
|
|
|
1147 #(number in position-4)*(number in position -3)*(number in position -2)...=total
|
|
|
1148 # require(rJava)
|
|
|
1149 # require(xlsxjars)
|
|
|
1150 # require(xlsx)
|
|
|
1151 # # require(readxl)
|
|
|
1152
|
|
|
1153 #View(SDtable)
|
|
|
1154 bareSDs<-SDtable[2:21,2:16]
|
|
|
1155 goodones<-bareSDs>2
|
|
|
1156
|
|
|
1157 Positionm7<-which(goodones[,1] %in% TRUE)
|
|
|
1158 if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))}
|
|
|
1159 Positionm6<-which(goodones[,2] %in% TRUE)
|
|
|
1160 if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))}
|
|
|
1161 Positionm5<-which(goodones[,3] %in% TRUE)
|
|
|
1162 if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))}
|
|
|
1163 Positionm4<-which(goodones[,4] %in% TRUE)
|
|
|
1164 if (length(Positionm4)<1){Positionm4<-which(bareSDs[,4]==max(bareSDs[,4]))}
|
|
|
1165 Positionm3<-which(goodones[,5] %in% TRUE)
|
|
|
1166 if (length(Positionm3)<1){Positionm3<-which(bareSDs[,5]==max(bareSDs[,5]))}
|
|
|
1167 Positionm2<-which(goodones[,6] %in% TRUE)
|
|
|
1168 if (length(Positionm2)<1){Positionm2<-which(bareSDs[,6]==max(bareSDs[,6]))}
|
|
|
1169 Positionm1<-which(goodones[,7] %in% TRUE)
|
|
|
1170 if (length(Positionm1)<1){Positionm1<-which(bareSDs[,7]==max(bareSDs[,7]))}
|
|
|
1171
|
|
|
1172 Positiond0<-which(goodones[,8] %in% TRUE)
|
|
|
1173 if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))}
|
|
|
1174
|
|
|
1175 Positionp1<-which(goodones[,9] %in% TRUE)
|
|
|
1176 if (length(Positionp1)<1){Positionp1<-which(bareSDs[,9]==max(bareSDs[,9]))}
|
|
|
1177 Positionp2<-which(goodones[,10] %in% TRUE)
|
|
|
1178 if (length(Positionp2)<1){Positionp2<-which(bareSDs[,10]==max(bareSDs[,10]))}
|
|
|
1179 Positionp3<-which(goodones[,11] %in% TRUE)
|
|
|
1180 if (length(Positionp3)<1){Positionp3<-which(bareSDs[,11]==max(bareSDs[,11]))}
|
|
|
1181 Positionp4<-which(goodones[,12] %in% TRUE)
|
|
|
1182 if (length(Positionp4)<1){Positionp4<-which(bareSDs[,12]==max(bareSDs[,12]))}
|
|
|
1183 Positionp5<-which(goodones[,13] %in% TRUE)
|
|
|
1184 if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))}
|
|
|
1185 Positionp6<-which(goodones[,14] %in% TRUE)
|
|
|
1186 if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))}
|
|
|
1187 Positionp7<-which(goodones[,15] %in% TRUE)
|
|
|
1188 if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))}
|
|
|
1189
|
|
|
1190 aa_props2 <- c("1"="A", "2"="C", "3"="D", "4"="E", "5"="F", "6"="G", "7"="H", "8"="I", "9"="K", "10"="L", "11"="M", "12"="N",
|
|
|
1191 "13"="P", "14"="Q", "15"="R", "16"="S", "17"="T", "18"="V", "19"="W", "20"="Y")
|
|
|
1192
|
|
|
1193 Positionm7<-sapply(Positionm7, function (x) aa_props2[x])
|
|
|
1194 Positionm6<-sapply(Positionm6, function (x) aa_props2[x])
|
|
|
1195 Positionm5<-sapply(Positionm5, function (x) aa_props2[x])
|
|
|
1196 Positionm4<-sapply(Positionm4, function (x) aa_props2[x])
|
|
|
1197 Positionm3<-sapply(Positionm3, function (x) aa_props2[x])
|
|
|
1198 Positionm2<-sapply(Positionm2, function (x) aa_props2[x])
|
|
|
1199 Positionm1<-sapply(Positionm1, function (x) aa_props2[x])
|
|
|
1200 Positiond0<-sapply(Positiond0, function (x) aa_props2[x])
|
|
|
1201 Positionp1<-sapply(Positionp1, function (x) aa_props2[x])
|
|
|
1202 Positionp2<-sapply(Positionp2, function (x) aa_props2[x])
|
|
|
1203 Positionp3<-sapply(Positionp3, function (x) aa_props2[x])
|
|
|
1204 Positionp4<-sapply(Positionp4, function (x) aa_props2[x])
|
|
|
1205 Positionp5<-sapply(Positionp5, function (x) aa_props2[x])
|
|
|
1206 Positionp6<-sapply(Positionp6, function (x) aa_props2[x])
|
|
|
1207 Positionp7<-sapply(Positionp7, function (x) aa_props2[x])
|
|
|
1208
|
|
|
1209
|
|
|
1210 # Positionm7<-c("D","H","N","V")
|
|
|
1211 # Positionm6<-c("E","V")
|
|
|
1212 # Positionm5<-c("D","H")
|
|
|
1213 # Positionm4<-c("D","N")
|
|
|
1214 # Positionm3<-c("D","E","F","Q")
|
|
|
1215 # Positionm2<-c("D","N","Q","S")
|
|
|
1216 # Positionm1<-c("F","I","L")
|
|
|
1217 # Positiond0<-c("Y")
|
|
|
1218 # Positionp1<-c("A","E")
|
|
|
1219 # Positionp2<-c("T","S","Q","E")
|
|
|
1220 # Positionp3<-c("V")
|
|
|
1221 # Positionp4<-c("K")
|
|
|
1222 # Positionp5<-c("K")
|
|
|
1223 # Positionp6<-c("K")
|
|
|
1224 # Positionp7<-c("R")
|
|
|
1225 #this is where the amino acids for each position are given. m means minus, p mean plus
|
|
|
1226 ########################################
|
|
|
1227 # ScreenerFilename<-"C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls"
|
|
|
1228
|
|
|
1229
|
|
|
1230
|
|
|
1231
|
|
|
1232
|
|
|
1233
|
|
|
1234
|
|
|
1235
|
|
|
1236
|
|
|
1237
|
|
|
1238
|
|
|
1239 screaner<-read.csv(ScreenerFilename, header = FALSE, stringsAsFactors = FALSE)
|
|
|
1240
|
|
|
1241
|
|
|
1242 Abl<-screaner[2:25,]
|
|
|
1243 Arg<-screaner[27:50,]
|
|
|
1244 Btk<-screaner[52:75,]
|
|
|
1245 Csk<-screaner[77:100,]
|
|
|
1246 Fyn<-screaner[102:125,]
|
|
|
1247 Hck<-screaner[127:150,]
|
|
|
1248 JAK2<-screaner[152:175,]
|
|
|
1249 Lck<-screaner[177:200,]
|
|
|
1250 Lyn<-screaner[202:225,]
|
|
|
1251 Pyk2<-screaner[227:250,]
|
|
|
1252 Src<-screaner[252:275,]
|
|
|
1253 Syk<-screaner[277:300,]
|
|
|
1254 Yes<-screaner[302:325,]
|
|
|
1255
|
|
|
1256 #two questions: why are we doing BTK when we already have a bioninformatics page about it?
|
|
|
1257 #two I reran everything and only get 96 positions of interest in the SD table
|
|
|
1258
|
|
|
1259
|
|
|
1260
|
|
|
1261
|
|
|
1262
|
|
|
1263
|
|
|
1264
|
|
|
1265
|
|
|
1266
|
|
|
1267 #Do_You_want_An_Excel_Output_Questionmark<-"NO"
|
|
|
1268 GeneratedPeptidesFile<-"GeneratedPeptidesFile.csv"
|
|
|
1269
|
|
|
1270
|
|
|
1271 # Abl<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 4)
|
|
|
1272 # Arg<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 5)
|
|
|
1273 # Btk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 6)
|
|
|
1274 # Csk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 7)
|
|
|
1275 # Fyn<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 8)
|
|
|
1276 # Hck<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 9)
|
|
|
1277 # JAK2<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 10)
|
|
|
1278 # Lck<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 11)
|
|
|
1279 # Lyn<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 12)
|
|
|
1280 # Pyk2<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 13)
|
|
|
1281 # Src<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 14)
|
|
|
1282 # Syk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 15)
|
|
|
1283 # Yes<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 16)
|
|
|
1284 #
|
|
|
1285
|
|
|
1286
|
|
|
1287 "A"=1
|
|
|
1288 "C"=2
|
|
|
1289 "D"=3
|
|
|
1290 "E"=4
|
|
|
1291 "F"=5
|
|
|
1292 "G"=6
|
|
|
1293 "H"=7
|
|
|
1294 "I"=8
|
|
|
1295 "K"=9
|
|
|
1296 "L"=10
|
|
|
1297 "M"=11
|
|
|
1298 "N"=12
|
|
|
1299 "P"=13
|
|
|
1300 "Q"=14
|
|
|
1301 "R"=15
|
|
|
1302 "S"=16
|
|
|
1303 "T"=17
|
|
|
1304 "V"=18
|
|
|
1305 "W"=19
|
|
|
1306 "Y"=20
|
|
|
1307
|
|
|
1308 aa_props <- c("A"=A, "C"=C, "D"=D, "E"=E, "F"=F,"G"=G,"H"=H,"I"=I,"K"=K,"L"=L,"M"=M,"N"=N,"P"=P,"Q"=Q,"R"=R,
|
|
|
1309 "S"=S,"T"=T,"V"=V,"W"=W,"Y"=Y,"xY"=Y,"O"=21)
|
|
|
1310
|
|
|
1311 number15<-sapply(Positionm7, function (x) aa_props[x])
|
|
|
1312 number14<-sapply(Positionm6, function (x) aa_props[x])
|
|
|
1313 number13<-sapply(Positionm5, function (x) aa_props[x])
|
|
|
1314 number1 <- sapply(Positionm4, function (x) aa_props[x])
|
|
|
1315 number2 <- sapply(Positionm3, function (x) aa_props[x])
|
|
|
1316 number3 <- sapply(Positionm2, function (x) aa_props[x])
|
|
|
1317 number4 <- sapply(Positionm1, function (x) aa_props[x])
|
|
|
1318 number5 <- sapply(Positiond0, function (x) aa_props[x])
|
|
|
1319 number6 <- sapply(Positionp1, function (x) aa_props[x])
|
|
|
1320 number7 <- sapply(Positionp2, function (x) aa_props[x])
|
|
|
1321 number8 <- sapply(Positionp3, function (x) aa_props[x])
|
|
|
1322 number9 <- sapply(Positionp4, function (x) aa_props[x])
|
|
|
1323 number10<-sapply(Positionp5, function (x) aa_props[x])
|
|
|
1324 number11<-sapply(Positionp6, function (x) aa_props[x])
|
|
|
1325 number12<-sapply(Positionp7, function (x) aa_props[x])
|
|
|
1326
|
|
|
1327 # number1<-Positionm4
|
|
|
1328 # number2<-Positionm3
|
|
|
1329 # number3<-Positionm2
|
|
|
1330 # number4<-Positionm1
|
|
|
1331 # number5<-Positiond0
|
|
|
1332 # number6<-Positionp1
|
|
|
1333 # number7<-Positionp2
|
|
|
1334 # number8<-Positionp3
|
|
|
1335 # number9<-Positionp4
|
|
|
1336
|
|
|
1337 #############################
|
|
|
1338 #here I create the Abl seqs with proper value for each number
|
|
|
1339 if (1==0){
|
|
|
1340 Ablnumber1<- gsub("A",A,Ablnumber1,perl = TRUE)
|
|
|
1341 Ablnumber1<- gsub("C",C,Ablnumber1,perl = TRUE)
|
|
|
1342 Ablnumber1<- gsub("D",D,Ablnumber1,perl = TRUE)
|
|
|
1343 Ablnumber1<- gsub("E",E,Ablnumber1,perl = TRUE)
|
|
|
1344 Ablnumber1<- gsub("F",F,Ablnumber1,perl = TRUE)
|
|
|
1345 Ablnumber1<- gsub("G",G,Ablnumber1,perl = TRUE)
|
|
|
1346 Ablnumber1<- gsub("H",H,Ablnumber1,perl = TRUE)
|
|
|
1347 Ablnumber1<- gsub("I",I,Ablnumber1,perl = TRUE)
|
|
|
1348 Ablnumber1<- gsub("K",K,Ablnumber1,perl = TRUE)
|
|
|
1349 Ablnumber1<- gsub("L",L,Ablnumber1,perl = TRUE)
|
|
|
1350 Ablnumber1<- gsub("M",M,Ablnumber1,perl = TRUE)
|
|
|
1351 Ablnumber1<- gsub("N",N,Ablnumber1,perl = TRUE)
|
|
|
1352 Ablnumber1<- gsub("P",P,Ablnumber1,perl = TRUE)
|
|
|
1353 Ablnumber1<- gsub("Q",Q,Ablnumber1,perl = TRUE)
|
|
|
1354 Ablnumber1<- gsub("R",R,Ablnumber1,perl = TRUE)
|
|
|
1355 Ablnumber1<- gsub("S",S,Ablnumber1,perl = TRUE)
|
|
|
1356 Ablnumber1<- gsub("T",T,Ablnumber1,perl = TRUE)
|
|
|
1357 Ablnumber1<- gsub("V",V,Ablnumber1,perl = TRUE)
|
|
|
1358 Ablnumber1<- gsub("W",W,Ablnumber1,perl = TRUE)
|
|
|
1359 Ablnumber1<- gsub("Y",Y,Ablnumber1,perl = TRUE)
|
|
|
1360
|
|
|
1361 Ablnumber2<- gsub("A",A,Ablnumber2,perl = TRUE)
|
|
|
1362 Ablnumber2<- gsub("C",C,Ablnumber2,perl = TRUE)
|
|
|
1363 Ablnumber2<- gsub("D",D,Ablnumber2,perl = TRUE)
|
|
|
1364 Ablnumber2<- gsub("E",E,Ablnumber2,perl = TRUE)
|
|
|
1365 Ablnumber2<- gsub("F",F,Ablnumber2,perl = TRUE)
|
|
|
1366 Ablnumber2<- gsub("G",G,Ablnumber2,perl = TRUE)
|
|
|
1367 Ablnumber2<- gsub("H",H,Ablnumber2,perl = TRUE)
|
|
|
1368 Ablnumber2<- gsub("I",I,Ablnumber2,perl = TRUE)
|
|
|
1369 Ablnumber2<- gsub("K",K,Ablnumber2,perl = TRUE)
|
|
|
1370 Ablnumber2<- gsub("L",L,Ablnumber2,perl = TRUE)
|
|
|
1371 Ablnumber2<- gsub("M",M,Ablnumber2,perl = TRUE)
|
|
|
1372 Ablnumber2<- gsub("N",N,Ablnumber2,perl = TRUE)
|
|
|
1373 Ablnumber2<- gsub("P",P,Ablnumber2,perl = TRUE)
|
|
|
1374 Ablnumber2<- gsub("Q",Q,Ablnumber2,perl = TRUE)
|
|
|
1375 Ablnumber2<- gsub("R",R,Ablnumber2,perl = TRUE)
|
|
|
1376 Ablnumber2<- gsub("S",S,Ablnumber2,perl = TRUE)
|
|
|
1377 Ablnumber2<- gsub("T",T,Ablnumber2,perl = TRUE)
|
|
|
1378 Ablnumber2<- gsub("V",V,Ablnumber2,perl = TRUE)
|
|
|
1379 Ablnumber2<- gsub("W",W,Ablnumber2,perl = TRUE)
|
|
|
1380 Ablnumber2<- gsub("Y",Y,Ablnumber2,perl = TRUE)
|
|
|
1381
|
|
|
1382 Ablnumber3<- gsub("A",A,Ablnumber3,perl = TRUE)
|
|
|
1383 Ablnumber3<- gsub("C",C,Ablnumber3,perl = TRUE)
|
|
|
1384 Ablnumber3<- gsub("D",D,Ablnumber3,perl = TRUE)
|
|
|
1385 Ablnumber3<- gsub("E",E,Ablnumber3,perl = TRUE)
|
|
|
1386 Ablnumber3<- gsub("F",F,Ablnumber3,perl = TRUE)
|
|
|
1387 Ablnumber3<- gsub("G",G,Ablnumber3,perl = TRUE)
|
|
|
1388 Ablnumber3<- gsub("H",H,Ablnumber3,perl = TRUE)
|
|
|
1389 Ablnumber3<- gsub("I",I,Ablnumber3,perl = TRUE)
|
|
|
1390 Ablnumber3<- gsub("K",K,Ablnumber3,perl = TRUE)
|
|
|
1391 Ablnumber3<- gsub("L",L,Ablnumber3,perl = TRUE)
|
|
|
1392 Ablnumber3<- gsub("M",M,Ablnumber3,perl = TRUE)
|
|
|
1393 Ablnumber3<- gsub("N",N,Ablnumber3,perl = TRUE)
|
|
|
1394 Ablnumber3<- gsub("P",P,Ablnumber3,perl = TRUE)
|
|
|
1395 Ablnumber3<- gsub("Q",Q,Ablnumber3,perl = TRUE)
|
|
|
1396 Ablnumber3<- gsub("R",R,Ablnumber3,perl = TRUE)
|
|
|
1397 Ablnumber3<- gsub("S",S,Ablnumber3,perl = TRUE)
|
|
|
1398 Ablnumber3<- gsub("T",T,Ablnumber3,perl = TRUE)
|
|
|
1399 Ablnumber3<- gsub("V",V,Ablnumber3,perl = TRUE)
|
|
|
1400 Ablnumber3<- gsub("W",W,Ablnumber3,perl = TRUE)
|
|
|
1401 Ablnumber3<- gsub("Y",Y,Ablnumber3,perl = TRUE)
|
|
|
1402
|
|
|
1403 Ablnumber4<- gsub("A",A,Ablnumber4,perl = TRUE)
|
|
|
1404 Ablnumber4<- gsub("C",C,Ablnumber4,perl = TRUE)
|
|
|
1405 Ablnumber4<- gsub("D",D,Ablnumber4,perl = TRUE)
|
|
|
1406 Ablnumber4<- gsub("E",E,Ablnumber4,perl = TRUE)
|
|
|
1407 Ablnumber4<- gsub("F",F,Ablnumber4,perl = TRUE)
|
|
|
1408 Ablnumber4<- gsub("G",G,Ablnumber4,perl = TRUE)
|
|
|
1409 Ablnumber4<- gsub("H",H,Ablnumber4,perl = TRUE)
|
|
|
1410 Ablnumber4<- gsub("I",I,Ablnumber4,perl = TRUE)
|
|
|
1411 Ablnumber4<- gsub("K",K,Ablnumber4,perl = TRUE)
|
|
|
1412 Ablnumber4<- gsub("L",L,Ablnumber4,perl = TRUE)
|
|
|
1413 Ablnumber4<- gsub("M",M,Ablnumber4,perl = TRUE)
|
|
|
1414 Ablnumber4<- gsub("N",N,Ablnumber4,perl = TRUE)
|
|
|
1415 Ablnumber4<- gsub("P",P,Ablnumber4,perl = TRUE)
|
|
|
1416 Ablnumber4<- gsub("Q",Q,Ablnumber4,perl = TRUE)
|
|
|
1417 Ablnumber4<- gsub("R",R,Ablnumber4,perl = TRUE)
|
|
|
1418 Ablnumber4<- gsub("S",S,Ablnumber4,perl = TRUE)
|
|
|
1419 Ablnumber4<- gsub("T",T,Ablnumber4,perl = TRUE)
|
|
|
1420 Ablnumber4<- gsub("V",V,Ablnumber4,perl = TRUE)
|
|
|
1421 Ablnumber4<- gsub("W",W,Ablnumber4,perl = TRUE)
|
|
|
1422 Ablnumber4<- gsub("Y",Y,Ablnumber4,perl = TRUE)
|
|
|
1423
|
|
|
1424 Ablnumber5<- gsub("A",A,Ablnumber5,perl = TRUE)
|
|
|
1425 Ablnumber5<- gsub("C",C,Ablnumber5,perl = TRUE)
|
|
|
1426 Ablnumber5<- gsub("D",D,Ablnumber5,perl = TRUE)
|
|
|
1427 Ablnumber5<- gsub("E",E,Ablnumber5,perl = TRUE)
|
|
|
1428 Ablnumber5<- gsub("F",F,Ablnumber5,perl = TRUE)
|
|
|
1429 Ablnumber5<- gsub("G",G,Ablnumber5,perl = TRUE)
|
|
|
1430 Ablnumber5<- gsub("H",H,Ablnumber5,perl = TRUE)
|
|
|
1431 Ablnumber5<- gsub("I",I,Ablnumber5,perl = TRUE)
|
|
|
1432 Ablnumber5<- gsub("K",K,Ablnumber5,perl = TRUE)
|
|
|
1433 Ablnumber5<- gsub("L",L,Ablnumber5,perl = TRUE)
|
|
|
1434 Ablnumber5<- gsub("M",M,Ablnumber5,perl = TRUE)
|
|
|
1435 Ablnumber5<- gsub("N",N,Ablnumber5,perl = TRUE)
|
|
|
1436 Ablnumber5<- gsub("P",P,Ablnumber5,perl = TRUE)
|
|
|
1437 Ablnumber5<- gsub("Q",Q,Ablnumber5,perl = TRUE)
|
|
|
1438 Ablnumber5<- gsub("R",R,Ablnumber5,perl = TRUE)
|
|
|
1439 Ablnumber5<- gsub("S",S,Ablnumber5,perl = TRUE)
|
|
|
1440 Ablnumber5<- gsub("T",T,Ablnumber5,perl = TRUE)
|
|
|
1441 Ablnumber5<- gsub("V",V,Ablnumber5,perl = TRUE)
|
|
|
1442 Ablnumber5<- gsub("W",W,Ablnumber5,perl = TRUE)
|
|
|
1443 Ablnumber5<- gsub("Y",Y,Ablnumber5,perl = TRUE)
|
|
|
1444
|
|
|
1445 Ablnumber6<- gsub("A",A,Ablnumber6,perl = TRUE)
|
|
|
1446 Ablnumber6<- gsub("C",C,Ablnumber6,perl = TRUE)
|
|
|
1447 Ablnumber6<- gsub("D",D,Ablnumber6,perl = TRUE)
|
|
|
1448 Ablnumber6<- gsub("E",E,Ablnumber6,perl = TRUE)
|
|
|
1449 Ablnumber6<- gsub("F",F,Ablnumber6,perl = TRUE)
|
|
|
1450 Ablnumber6<- gsub("G",G,Ablnumber6,perl = TRUE)
|
|
|
1451 Ablnumber6<- gsub("H",H,Ablnumber6,perl = TRUE)
|
|
|
1452 Ablnumber6<- gsub("I",I,Ablnumber6,perl = TRUE)
|
|
|
1453 Ablnumber6<- gsub("K",K,Ablnumber6,perl = TRUE)
|
|
|
1454 Ablnumber6<- gsub("L",L,Ablnumber6,perl = TRUE)
|
|
|
1455 Ablnumber6<- gsub("M",M,Ablnumber6,perl = TRUE)
|
|
|
1456 Ablnumber6<- gsub("N",N,Ablnumber6,perl = TRUE)
|
|
|
1457 Ablnumber6<- gsub("P",P,Ablnumber6,perl = TRUE)
|
|
|
1458 Ablnumber6<- gsub("Q",Q,Ablnumber6,perl = TRUE)
|
|
|
1459 Ablnumber6<- gsub("R",R,Ablnumber6,perl = TRUE)
|
|
|
1460 Ablnumber6<- gsub("S",S,Ablnumber6,perl = TRUE)
|
|
|
1461 Ablnumber6<- gsub("T",T,Ablnumber6,perl = TRUE)
|
|
|
1462 Ablnumber6<- gsub("V",V,Ablnumber6,perl = TRUE)
|
|
|
1463 Ablnumber6<- gsub("W",W,Ablnumber6,perl = TRUE)
|
|
|
1464 Ablnumber6<- gsub("Y",Y,Ablnumber6,perl = TRUE)
|
|
|
1465
|
|
|
1466 Ablnumber7<- gsub("A",A,Ablnumber7,perl = TRUE)
|
|
|
1467 Ablnumber7<- gsub("C",C,Ablnumber7,perl = TRUE)
|
|
|
1468 Ablnumber7<- gsub("D",D,Ablnumber7,perl = TRUE)
|
|
|
1469 Ablnumber7<- gsub("E",E,Ablnumber7,perl = TRUE)
|
|
|
1470 Ablnumber7<- gsub("F",F,Ablnumber7,perl = TRUE)
|
|
|
1471 Ablnumber7<- gsub("G",G,Ablnumber7,perl = TRUE)
|
|
|
1472 Ablnumber7<- gsub("H",H,Ablnumber7,perl = TRUE)
|
|
|
1473 Ablnumber7<- gsub("I",I,Ablnumber7,perl = TRUE)
|
|
|
1474 Ablnumber7<- gsub("K",K,Ablnumber7,perl = TRUE)
|
|
|
1475 Ablnumber7<- gsub("L",L,Ablnumber7,perl = TRUE)
|
|
|
1476 Ablnumber7<- gsub("M",M,Ablnumber7,perl = TRUE)
|
|
|
1477 Ablnumber7<- gsub("N",N,Ablnumber7,perl = TRUE)
|
|
|
1478 Ablnumber7<- gsub("P",P,Ablnumber7,perl = TRUE)
|
|
|
1479 Ablnumber7<- gsub("Q",Q,Ablnumber7,perl = TRUE)
|
|
|
1480 Ablnumber7<- gsub("R",R,Ablnumber7,perl = TRUE)
|
|
|
1481 Ablnumber7<- gsub("S",S,Ablnumber7,perl = TRUE)
|
|
|
1482 Ablnumber7<- gsub("T",T,Ablnumber7,perl = TRUE)
|
|
|
1483 Ablnumber7<- gsub("V",V,Ablnumber7,perl = TRUE)
|
|
|
1484 Ablnumber7<- gsub("W",W,Ablnumber7,perl = TRUE)
|
|
|
1485 Ablnumber7<- gsub("Y",Y,Ablnumber7,perl = TRUE)
|
|
|
1486
|
|
|
1487 Ablnumber8<- gsub("A",A,Ablnumber8,perl = TRUE)
|
|
|
1488 Ablnumber8<- gsub("C",C,Ablnumber8,perl = TRUE)
|
|
|
1489 Ablnumber8<- gsub("D",D,Ablnumber8,perl = TRUE)
|
|
|
1490 Ablnumber8<- gsub("E",E,Ablnumber8,perl = TRUE)
|
|
|
1491 Ablnumber8<- gsub("F",F,Ablnumber8,perl = TRUE)
|
|
|
1492 Ablnumber8<- gsub("G",G,Ablnumber8,perl = TRUE)
|
|
|
1493 Ablnumber8<- gsub("H",H,Ablnumber8,perl = TRUE)
|
|
|
1494 Ablnumber8<- gsub("I",I,Ablnumber8,perl = TRUE)
|
|
|
1495 Ablnumber8<- gsub("K",K,Ablnumber8,perl = TRUE)
|
|
|
1496 Ablnumber8<- gsub("L",L,Ablnumber8,perl = TRUE)
|
|
|
1497 Ablnumber8<- gsub("M",M,Ablnumber8,perl = TRUE)
|
|
|
1498 Ablnumber8<- gsub("N",N,Ablnumber8,perl = TRUE)
|
|
|
1499 Ablnumber8<- gsub("P",P,Ablnumber8,perl = TRUE)
|
|
|
1500 Ablnumber8<- gsub("Q",Q,Ablnumber8,perl = TRUE)
|
|
|
1501 Ablnumber8<- gsub("R",R,Ablnumber8,perl = TRUE)
|
|
|
1502 Ablnumber8<- gsub("S",S,Ablnumber8,perl = TRUE)
|
|
|
1503 Ablnumber8<- gsub("T",T,Ablnumber8,perl = TRUE)
|
|
|
1504 Ablnumber8<- gsub("V",V,Ablnumber8,perl = TRUE)
|
|
|
1505 Ablnumber8<- gsub("W",W,Ablnumber8,perl = TRUE)
|
|
|
1506 Ablnumber8<- gsub("Y",Y,Ablnumber8,perl = TRUE)
|
|
|
1507
|
|
|
1508 Ablnumber9<- gsub("A",A,Ablnumber9,perl = TRUE)
|
|
|
1509 Ablnumber9<- gsub("C",C,Ablnumber9,perl = TRUE)
|
|
|
1510 Ablnumber9<- gsub("D",D,Ablnumber9,perl = TRUE)
|
|
|
1511 Ablnumber9<- gsub("E",E,Ablnumber9,perl = TRUE)
|
|
|
1512 Ablnumber9<- gsub("F",F,Ablnumber9,perl = TRUE)
|
|
|
1513 Ablnumber9<- gsub("G",G,Ablnumber9,perl = TRUE)
|
|
|
1514 Ablnumber9<- gsub("H",H,Ablnumber9,perl = TRUE)
|
|
|
1515 Ablnumber9<- gsub("I",I,Ablnumber9,perl = TRUE)
|
|
|
1516 Ablnumber9<- gsub("K",K,Ablnumber9,perl = TRUE)
|
|
|
1517 Ablnumber9<- gsub("L",L,Ablnumber9,perl = TRUE)
|
|
|
1518 Ablnumber9<- gsub("M",M,Ablnumber9,perl = TRUE)
|
|
|
1519 Ablnumber9<- gsub("N",N,Ablnumber9,perl = TRUE)
|
|
|
1520 Ablnumber9<- gsub("P",P,Ablnumber9,perl = TRUE)
|
|
|
1521 Ablnumber9<- gsub("Q",Q,Ablnumber9,perl = TRUE)
|
|
|
1522 Ablnumber9<- gsub("R",R,Ablnumber9,perl = TRUE)
|
|
|
1523 Ablnumber9<- gsub("S",S,Ablnumber9,perl = TRUE)
|
|
|
1524 Ablnumber9<- gsub("T",T,Ablnumber9,perl = TRUE)
|
|
|
1525 Ablnumber9<- gsub("V",V,Ablnumber9,perl = TRUE)
|
|
|
1526 Ablnumber9<- gsub("W",W,Ablnumber9,perl = TRUE)
|
|
|
1527 Ablnumber9<- gsub("Y",Y,Ablnumber9,perl = TRUE)
|
|
|
1528 }
|
|
|
1529 ########################################
|
|
|
1530
|
|
|
1531
|
|
|
1532 total=length(Positionp7)*length(Positionp6)*length(Positionp5)*length(Positionp4)*length(Positionp3)*(length(Positionp2))*length(Positionp1)*
|
|
|
1533 length(Positiond0)*length(Positionm1)*length(Positionm2)*length(Positionm3)*length(Positionm4)*length(Positionm5)*length(Positionm6)*length(Positionm7)
|
|
|
1534 #this is just a way to doublecheck that the length of the generated peptides vector is correct
|
|
|
1535
|
|
|
1536 GeneratedPeptides<-rep(NA, times=total*15)
|
|
|
1537 GeneratedPeptides<-matrix(data = GeneratedPeptides,ncol = 15)
|
|
|
1538
|
|
|
1539 NumeratedPeptides<-GeneratedPeptides
|
|
|
1540 #create an empty vector of correct length by finding the number of each AAs per position and multiplying them
|
|
|
1541 count<-0
|
|
|
1542 for (t in 1:length(Positionm7)) {
|
|
|
1543 for (s in 1:length(Positionm6)) {
|
|
|
1544 for (r in 1:length(Positionm5)) {
|
|
|
1545 for (i in 1:length(Positionm4)) {
|
|
|
1546 for (j in 1:length(Positionm3)) {
|
|
|
1547 for (k in 1:length(Positionm2)) {
|
|
|
1548 for (l in 1:length(Positionm1)) {
|
|
|
1549 for (m in 1:length(Positiond0)) {
|
|
|
1550 for (n in 1:length(Positionp1)) {
|
|
|
1551 for (o in 1:length(Positionp2)) {
|
|
|
1552 for (p in 1:length(Positionp3)) {
|
|
|
1553 for (q in 1:length(Positionp4)) {
|
|
|
1554 for (u in 1:length(Positionp5)) {
|
|
|
1555 for (v in 1:length(Positionp6)) {
|
|
|
1556 for (w in 1:length(Positionp7)) {
|
|
|
1557 # i=1
|
|
|
1558 # j=1
|
|
|
1559 # k=1
|
|
|
1560 # l=1
|
|
|
1561 # m=1
|
|
|
1562 # n=1
|
|
|
1563 # o=1
|
|
|
1564 # p=1
|
|
|
1565 # q=1
|
|
|
1566 #
|
|
|
1567 #for every single position, increment the count number, create a peptide using the AAs at that position
|
|
|
1568 #then put them together into the generated peptides sequencex
|
|
|
1569 count<-count+1
|
|
|
1570 tabulation<-c(Positionm7[t],Positionm6[s],Positionm5[r],Positionm4[i],Positionm3[j],Positionm2[k],Positionm1[l],Positiond0[m],Positionp1[n],
|
|
|
1571 Positionp2[o],Positionp3[p],Positionp4[q],Positionp5[u],Positionp6[v],Positionp7[w])
|
|
|
1572 numeration<-c(number15[t],number14[s],number13[r],number1[i],number2[j],number3[k],number4[l],number5[m],number6[n],number7[o],number8[p],number9[q],number10[u],number11[v],
|
|
|
1573 number12[w])
|
|
|
1574 #tabulation<-paste(tabulation, sep="", collapse="")
|
|
|
1575 GeneratedPeptides[count,1:15]<-tabulation
|
|
|
1576 NumeratedPeptides[count,1:15]<-numeration
|
|
|
1577 }
|
|
|
1578 }
|
|
|
1579 }
|
|
|
1580 }
|
|
|
1581 }
|
|
|
1582 }
|
|
|
1583 }
|
|
|
1584 }
|
|
|
1585 }
|
|
|
1586 }
|
|
|
1587 }
|
|
|
1588 }
|
|
|
1589 }
|
|
|
1590 }
|
|
|
1591 }
|
|
|
1592 ####################################################################
|
|
|
1593 #now here I use the Endogenous Probabilty matrix from the previous script, which is called EMPtable
|
|
|
1594 #to score the created peptides
|
|
|
1595 ThisKinTable<-EPMtableu#[1:nrow(SDtable),]
|
|
|
1596 TKTcolumn<-c(data=rep(1,times=21))
|
|
|
1597 TKTcolumn<-as.matrix(TKTcolumn,ncol=1)
|
|
|
1598 ThisKinTable<-cbind(TKTcolumn,ThisKinTable)
|
|
|
1599
|
|
|
1600 ThisKinGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
|
|
|
1601 ThisKinGenWeirdScore<-rep(NA,times=nrow(GeneratedPeptides))
|
|
|
1602
|
|
|
1603 for (x in 1:nrow(GeneratedPeptides)){
|
|
|
1604 Scoringpeptide<-NumeratedPeptides[x,1:15]
|
|
|
1605 Scoringpeptide<-Scoringpeptide+1
|
|
|
1606 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]*
|
|
|
1607 ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]*
|
|
|
1608 #ThisKinTable[as.numeric(Scoringpeptide[8]),10]*
|
|
|
1609 ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]*
|
|
|
1610 ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17]
|
|
|
1611 ThisKinGeneratedScores[x]<-ThisKinTableScore
|
|
|
1612 ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2])))
|
|
|
1613 ThisKinGenWeirdScore[x]<-ThisKinTableScore
|
|
|
1614 }
|
|
|
1615
|
|
|
1616 AblGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
|
|
|
1617 ArgGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
|
|
|
1618 BtkGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
|
|
|
1619 CskGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
|
|
|
1620 FynGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
|
|
|
1621 HckGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
|
|
|
1622 JAK2GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
|
|
|
1623 LckGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
|
|
|
1624 LynGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
|
|
|
1625 Pyk2GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
|
|
|
1626 SrcGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
|
|
|
1627 SykGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
|
|
|
1628 YesGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides))
|
|
|
1629
|
|
|
1630
|
|
|
1631 for (x in 1:nrow(GeneratedPeptides)){
|
|
|
1632 Scoringpeptide<-NumeratedPeptides[x,1:15]
|
|
|
1633 AblScore<-Abl[Scoringpeptide[1],2]*Abl[Scoringpeptide[2],3]*Abl[Scoringpeptide[3],4]*Abl[Scoringpeptide[4],5]*Abl[Scoringpeptide[5],6]*Abl[Scoringpeptide[6],7]*
|
|
|
1634 Abl[Scoringpeptide[7],8]*Abl[Scoringpeptide[9],10]*Abl[Scoringpeptide[10],11]*Abl[Scoringpeptide[11],12]*Abl[Scoringpeptide[12],13]*
|
|
|
1635 Abl[Scoringpeptide[13],14]*Abl[Scoringpeptide[14],15]*Abl[Scoringpeptide[15],16]
|
|
|
1636 AblGeneratedScores[x]<-AblScore
|
|
|
1637
|
|
|
1638 ArgScore<-Arg[Scoringpeptide[1],2]*Arg[Scoringpeptide[2],3]*Arg[Scoringpeptide[3],4]*Arg[Scoringpeptide[4],5]*Arg[Scoringpeptide[5],6]*Arg[Scoringpeptide[6],7]*
|
|
|
1639 Arg[Scoringpeptide[7],8]*Arg[Scoringpeptide[9],10]*Arg[Scoringpeptide[10],11]*Arg[Scoringpeptide[11],12]*Arg[Scoringpeptide[12],13]*
|
|
|
1640 Arg[Scoringpeptide[13],14]*Arg[Scoringpeptide[14],15]*Arg[Scoringpeptide[15],16]
|
|
|
1641 ArgGeneratedScores[x]<-ArgScore
|
|
|
1642
|
|
|
1643 BtkScore<-Btk[Scoringpeptide[1],2]*Btk[Scoringpeptide[2],3]*Btk[Scoringpeptide[3],4]*Btk[Scoringpeptide[4],5]*Btk[Scoringpeptide[5],6]*Btk[Scoringpeptide[6],7]*
|
|
|
1644 Btk[Scoringpeptide[7],8]*Btk[Scoringpeptide[9],10]*Btk[Scoringpeptide[10],11]*Btk[Scoringpeptide[11],12]*Btk[Scoringpeptide[12],13]*
|
|
|
1645 Btk[Scoringpeptide[13],14]*Btk[Scoringpeptide[14],15]*Btk[Scoringpeptide[15],16]
|
|
|
1646 BtkGeneratedScores[x]<-BtkScore
|
|
|
1647
|
|
|
1648 CskScore<-Csk[Scoringpeptide[1],2]*Csk[Scoringpeptide[2],3]*Csk[Scoringpeptide[3],4]*Csk[Scoringpeptide[4],5]*Csk[Scoringpeptide[5],6]*Csk[Scoringpeptide[6],7]*
|
|
|
1649 Csk[Scoringpeptide[7],8]*Csk[Scoringpeptide[9],10]*Csk[Scoringpeptide[10],11]*Csk[Scoringpeptide[11],12]*Csk[Scoringpeptide[12],13]*
|
|
|
1650 Csk[Scoringpeptide[13],14]*Csk[Scoringpeptide[14],15]*Csk[Scoringpeptide[15],16]
|
|
|
1651 CskGeneratedScores[x]<-CskScore
|
|
|
1652
|
|
|
1653 FynScore<-Fyn[Scoringpeptide[1],2]*Fyn[Scoringpeptide[2],3]*Fyn[Scoringpeptide[3],4]*Fyn[Scoringpeptide[4],5]*Fyn[Scoringpeptide[5],6]*Fyn[Scoringpeptide[6],7]*
|
|
|
1654 Fyn[Scoringpeptide[7],8]*Fyn[Scoringpeptide[9],10]*Fyn[Scoringpeptide[10],11]*Fyn[Scoringpeptide[11],12]*Fyn[Scoringpeptide[12],13]*
|
|
|
1655 Fyn[Scoringpeptide[13],14]*Fyn[Scoringpeptide[14],15]*Fyn[Scoringpeptide[15],16]
|
|
|
1656 FynGeneratedScores[x]<-FynScore
|
|
|
1657
|
|
|
1658 HckScore<-Hck[Scoringpeptide[1],2]*Hck[Scoringpeptide[2],3]*Hck[Scoringpeptide[3],4]*Hck[Scoringpeptide[4],5]*Hck[Scoringpeptide[5],6]*Hck[Scoringpeptide[6],7]*
|
|
|
1659 Hck[Scoringpeptide[7],8]*Hck[Scoringpeptide[9],10]*Hck[Scoringpeptide[10],11]*Hck[Scoringpeptide[11],12]*Hck[Scoringpeptide[12],13]*
|
|
|
1660 Hck[Scoringpeptide[13],14]*Hck[Scoringpeptide[14],15]*Hck[Scoringpeptide[15],16]
|
|
|
1661 HckGeneratedScores[x]<-HckScore
|
|
|
1662
|
|
|
1663 JAK2Score<-JAK2[Scoringpeptide[1],2]*JAK2[Scoringpeptide[2],3]*JAK2[Scoringpeptide[3],4]*JAK2[Scoringpeptide[4],5]*JAK2[Scoringpeptide[5],6]*JAK2[Scoringpeptide[6],7]*
|
|
|
1664 JAK2[Scoringpeptide[7],8]*JAK2[Scoringpeptide[9],10]*JAK2[Scoringpeptide[10],11]*JAK2[Scoringpeptide[11],12]*JAK2[Scoringpeptide[12],13]*
|
|
|
1665 JAK2[Scoringpeptide[13],14]*JAK2[Scoringpeptide[14],15]*JAK2[Scoringpeptide[15],16]
|
|
|
1666 JAK2GeneratedScores[x]<-JAK2Score
|
|
|
1667
|
|
|
1668 LckScore<-Lck[Scoringpeptide[1],2]*Lck[Scoringpeptide[2],3]*Lck[Scoringpeptide[3],4]*Lck[Scoringpeptide[4],5]*Lck[Scoringpeptide[5],6]*Lck[Scoringpeptide[6],7]*
|
|
|
1669 Lck[Scoringpeptide[7],8]*Lck[Scoringpeptide[9],10]*Lck[Scoringpeptide[10],11]*Lck[Scoringpeptide[11],12]*Lck[Scoringpeptide[12],13]*
|
|
|
1670 Lck[Scoringpeptide[13],14]*Lck[Scoringpeptide[14],15]*Lck[Scoringpeptide[15],16]
|
|
|
1671 LckGeneratedScores[x]<-LckScore
|
|
|
1672
|
|
|
1673 LynScore<-Lyn[Scoringpeptide[1],2]*Lyn[Scoringpeptide[2],3]*Lyn[Scoringpeptide[3],4]*Lyn[Scoringpeptide[4],5]*Lyn[Scoringpeptide[5],6]*Lyn[Scoringpeptide[6],7]*
|
|
|
1674 Lyn[Scoringpeptide[7],8]*Lyn[Scoringpeptide[9],10]*Lyn[Scoringpeptide[10],11]*Lyn[Scoringpeptide[11],12]*Lyn[Scoringpeptide[12],13]*
|
|
|
1675 Lyn[Scoringpeptide[13],14]*Lyn[Scoringpeptide[14],15]*Lyn[Scoringpeptide[15],16]
|
|
|
1676 LynGeneratedScores[x]<-LynScore
|
|
|
1677
|
|
|
1678 Pyk2Score<-Pyk2[Scoringpeptide[1],2]*Pyk2[Scoringpeptide[2],3]*Pyk2[Scoringpeptide[3],4]*Pyk2[Scoringpeptide[4],5]*Pyk2[Scoringpeptide[5],6]*Pyk2[Scoringpeptide[6],7]*
|
|
|
1679 Pyk2[Scoringpeptide[7],8]*Pyk2[Scoringpeptide[9],10]*Pyk2[Scoringpeptide[10],11]*Pyk2[Scoringpeptide[11],12]*Pyk2[Scoringpeptide[12],13]*
|
|
|
1680 Pyk2[Scoringpeptide[13],14]*Pyk2[Scoringpeptide[14],15]*Pyk2[Scoringpeptide[15],16]
|
|
|
1681 Pyk2GeneratedScores[x]<-Pyk2Score
|
|
|
1682
|
|
|
1683 SrcScore<-Src[Scoringpeptide[1],2]*Src[Scoringpeptide[2],3]*Src[Scoringpeptide[3],4]*Src[Scoringpeptide[4],5]*Src[Scoringpeptide[5],6]*Src[Scoringpeptide[6],7]*
|
|
|
1684 Src[Scoringpeptide[7],8]*Src[Scoringpeptide[9],10]*Src[Scoringpeptide[10],11]*Src[Scoringpeptide[11],12]*Src[Scoringpeptide[12],13]*
|
|
|
1685 Src[Scoringpeptide[13],14]*Src[Scoringpeptide[14],15]*Src[Scoringpeptide[15],16]
|
|
|
1686 SrcGeneratedScores[x]<-SrcScore
|
|
|
1687
|
|
|
1688 SykScore<-Syk[Scoringpeptide[1],2]*Syk[Scoringpeptide[2],3]*Syk[Scoringpeptide[3],4]*Syk[Scoringpeptide[4],5]*Syk[Scoringpeptide[5],6]*Syk[Scoringpeptide[6],7]*
|
|
|
1689 Syk[Scoringpeptide[7],8]*Syk[Scoringpeptide[9],10]*Syk[Scoringpeptide[10],11]*Syk[Scoringpeptide[11],12]*Syk[Scoringpeptide[12],13]*
|
|
|
1690 Syk[Scoringpeptide[13],14]*Syk[Scoringpeptide[14],15]*Syk[Scoringpeptide[15],16]
|
|
|
1691 SykGeneratedScores[x]<-SykScore
|
|
|
1692
|
|
|
1693 YesScore<-Yes[Scoringpeptide[1],2]*Yes[Scoringpeptide[2],3]*Yes[Scoringpeptide[3],4]*Yes[Scoringpeptide[4],5]*Yes[Scoringpeptide[5],6]*Yes[Scoringpeptide[6],7]*
|
|
|
1694 Yes[Scoringpeptide[7],8]*Yes[Scoringpeptide[9],10]*Yes[Scoringpeptide[10],11]*Yes[Scoringpeptide[11],12]*Yes[Scoringpeptide[12],13]*
|
|
|
1695 Yes[Scoringpeptide[13],14]*Yes[Scoringpeptide[14],15]*Yes[Scoringpeptide[15],16]
|
|
|
1696 YesGeneratedScores[x]<-YesScore
|
|
|
1697
|
|
|
1698 # ThisKinTableScore<-ThisKinTable[as.numeric(Scoringpeptide[1]),3]*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]*
|
|
|
1699 # ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*
|
|
|
1700 # ThisKinTable[as.numeric(Scoringpeptide[7]),9]*ThisKinTable[as.numeric(Scoringpeptide[8]),10]*ThisKinTable[as.numeric(Scoringpeptide[9]),11]
|
|
|
1701 # ThisKinGeneratedScores[x]<-ThisKinTableScore
|
|
|
1702 }
|
|
|
1703
|
|
|
1704
|
|
|
1705
|
|
|
1706 AblNorm<-1/as.numeric(Abl[22,1])
|
|
|
1707 AblThresh<-as.numeric(Abl[24,1])
|
|
|
1708 AblTrueThresh<-((AblThresh*AblNorm)/(100-AblThresh))
|
|
|
1709 AblActive<-unlist(AblGeneratedScores)>AblTrueThresh
|
|
|
1710
|
|
|
1711 ArgNorm<-1/as.numeric(Arg[22,1])
|
|
|
1712 ArgThresh<-as.numeric(Arg[24,1])
|
|
|
1713 ArgTrueThresh<-((ArgThresh*ArgNorm)/(100-ArgThresh))
|
|
|
1714 ArgActive<-unlist(ArgGeneratedScores)>ArgTrueThresh
|
|
|
1715
|
|
|
1716 BtkNorm<-1/as.numeric(Btk[22,1])
|
|
|
1717 BtkThresh<-as.numeric(Btk[24,1])
|
|
|
1718 BtkTrueThresh<-((BtkThresh*BtkNorm)/(100-BtkThresh))
|
|
|
1719 BtkActive<-unlist(BtkGeneratedScores)>BtkTrueThresh
|
|
|
1720
|
|
|
1721 CskNorm<-1/as.numeric(Csk[22,1])
|
|
|
1722 CskThresh<-as.numeric(Csk[24,1])
|
|
|
1723 CskTrueThresh<-((CskThresh*CskNorm)/(100-CskThresh))
|
|
|
1724 CskActive<-(CskGeneratedScores)>CskTrueThresh
|
|
|
1725
|
|
|
1726 FynNorm<-1/as.numeric(Fyn[22,1])
|
|
|
1727 FynThresh<-as.numeric(Fyn[24,1])
|
|
|
1728 FynTrueThresh<-((FynThresh*FynNorm)/(100-FynThresh))
|
|
|
1729 FynActive<-unlist(FynGeneratedScores)>FynTrueThresh
|
|
|
1730
|
|
|
1731 HckNorm<-1/as.numeric(Hck[22,1])
|
|
|
1732 HckThresh<-as.numeric(Hck[24,1])
|
|
|
1733 HckTrueThresh<-((HckThresh*HckNorm)/(100-HckThresh))
|
|
|
1734 HckActive<-unlist(HckGeneratedScores)>HckTrueThresh
|
|
|
1735
|
|
|
1736 JAK2Norm<-1/as.numeric(JAK2[22,1])
|
|
|
1737 JAK2Thresh<-as.numeric(JAK2[24,1])
|
|
|
1738 JAK2TrueThresh<-((JAK2Thresh*JAK2Norm)/(100-JAK2Thresh))
|
|
|
1739 JAk2Active<-unlist(JAK2GeneratedScores)>JAK2TrueThresh
|
|
|
1740
|
|
|
1741 LckNorm<-1/as.numeric(Lck[22,1])
|
|
|
1742 LckThresh<-as.numeric(Lck[24,1])
|
|
|
1743 LckTrueThresh<-((LckThresh*LckNorm)/(100-LckThresh))
|
|
|
1744 LckActive<-unlist(LckGeneratedScores)>LckTrueThresh
|
|
|
1745
|
|
|
1746 LynNorm<-1/as.numeric(Lyn[22,1])
|
|
|
1747 LynThresh<-as.numeric(Lyn[24,1])
|
|
|
1748 LynTrueThresh<-((LynThresh*LynNorm)/(100-LynThresh))
|
|
|
1749 LynActive<-unlist(LynGeneratedScores)>LynTrueThresh
|
|
|
1750
|
|
|
1751 Pyk2Norm<-1/as.numeric(Pyk2[22,1])
|
|
|
1752 Pyk2Thresh<-as.numeric(Pyk2[24,1])
|
|
|
1753 Pyk2TrueThresh<-((Pyk2Thresh*Pyk2Norm)/(100-Pyk2Thresh))
|
|
|
1754 Pyk2Active<-unlist(Pyk2GeneratedScores)>Pyk2TrueThresh
|
|
|
1755
|
|
|
1756 SrcNorm<-1/as.numeric(Src[22,1])
|
|
|
1757 SrcThresh<-as.numeric(Src[24,1])
|
|
|
1758 SrcTrueThresh<-((SrcThresh*SrcNorm)/(100-SrcThresh))
|
|
|
1759 SrcActive<-unlist(SrcGeneratedScores)>SrcTrueThresh
|
|
|
1760
|
|
|
1761 SykNorm<-1/as.numeric(Syk[22,1])
|
|
|
1762 SykThresh<-as.numeric(Syk[24,1])
|
|
|
1763 SykTrueThresh<-((SykThresh*SykNorm)/(100-SykThresh))
|
|
|
1764 SykActive<-unlist(SykGeneratedScores)>SykTrueThresh
|
|
|
1765
|
|
|
1766 YesNorm<-1/as.numeric(Yes[22,1])
|
|
|
1767 YesThresh<-as.numeric(Yes[24,1])
|
|
|
1768 YesTrueThresh<-((YesThresh*YesNorm)/(100-YesThresh))
|
|
|
1769 YesActive<-unlist(YesGeneratedScores)>YesTrueThresh
|
|
|
1770
|
|
|
1771 AllActive<-AblActive+ArgActive+BtkActive+CskActive+FynActive+HckActive+JAk2Active+LckActive+LynActive+Pyk2Active+SrcActive+SykActive+YesActive
|
|
|
1772 #Btkactive+
|
|
|
1773
|
|
|
1774 Scores<-ThisKinGeneratedScores
|
|
|
1775 ThresholdValues<-ThisKinGenWeirdScore
|
|
|
1776
|
|
|
1777 FullMotifs<-rep("Z",times=nrow(GeneratedPeptides))
|
|
|
1778 for (i in 1:nrow(GeneratedPeptides)) {
|
|
|
1779 motif<-GeneratedPeptides[i,1:15]
|
|
|
1780 motif<-paste(motif,sep = "", collapse = "")
|
|
|
1781 FullMotifs[i]<-motif
|
|
|
1782 }
|
|
|
1783
|
|
|
1784 PeptidesWithRanks<-cbind.data.frame(FullMotifs,GeneratedPeptides,Scores,ThresholdValues)
|
|
|
1785 PeptidesWithRanks<-cbind.data.frame(PeptidesWithRanks,AllActive,AblActive,ArgActive,BtkActive,CskActive,FynActive,HckActive,JAk2Active,LckActive,LynActive,Pyk2Active,SrcActive,SykActive,YesActive)
|
|
|
1786 RanksPeptides<-PeptidesWithRanks[order(PeptidesWithRanks$AllActive,decreasing = FALSE),]
|
|
|
1787 # PepRankHead<-c(1:9,"Sequence","RPMS","PMS")
|
|
|
1788 # RanksPeptides<-rbind.data.frame(PepRankHead,PeptidesWithRanks)
|
|
|
1789 #head(RanksPeptides)
|
|
|
1790
|
|
|
1791
|
|
|
1792 #now I have to score the negative sequences... for some reason
|
|
|
1793 #write up how we transfect with lipofectamine
|
|
|
1794 #3,4,5 questions
|
|
|
1795
|
|
|
1796 #PAUSED EHRE AT 4:50, HOPING THAT FIXING MINERVOTHING SO THAT LEFT SPACES WORKS FIXES A THING. OTHERWISE
|
|
|
1797 #I FUCKED WITH THE MCC TABLE AND NEED TO FINISH IT
|
|
|
1798
|
|
|
1799 ThisKinBlanks<-rep(1,times=17)
|
|
|
1800 #indx <- sapply(breast, is.factor)
|
|
|
1801 #ThisKinTable[indx] <- lapply(ThisKinTable[indx], function(x) as.character(x))
|
|
|
1802 ThisKinTable$SetOfAAs<-as.character(ThisKinTable$SetOfAAs)
|
|
|
1803
|
|
|
1804 #ThisKinTest<-rbind.data.frame(ThisKinTable,ThisKinBlanks)
|
|
|
1805 ThisKinTable<-rbind.data.frame(ThisKinTable,ThisKinBlanks)
|
|
|
1806
|
|
|
1807 NegativeScores<-rep(NA,times=nrow(NegativeSubstrateList))
|
|
|
1808 NegativeWeirdScores<-rep(NA,times=nrow(NegativeSubstrateList))
|
|
|
1809 for (v in 1:nrow(NegativeSubstrateList)) {
|
|
|
1810 motif<-NegativeSubstrateList[v,2]
|
|
|
1811 motif<-unlist(strsplit(motif,""))
|
|
|
1812 #if (length(motif)<9){print(v)}}
|
|
|
1813 # motif[1] <- sapply(motif[1], function (x) aa_props[x])
|
|
|
1814 # motif[2] <- sapply(motif[2], function (x) aa_props[x])
|
|
|
1815 # motif[3] <- sapply(motif[3], function (x) aa_props[x])
|
|
|
1816 # motif[4] <- sapply(motif[4], function (x) aa_props[x])
|
|
|
1817 # motif[5] <- sapply(motif[5], function (x) aa_props[x])
|
|
|
1818 # motif[6] <- sapply(motif[6], function (x) aa_props[x])
|
|
|
1819 # motif[7] <- sapply(motif[7], function (x) aa_props[x])
|
|
|
1820 # motif[8] <- sapply(motif[8], function (x) aa_props[x])
|
|
|
1821 # motif[9] <- sapply(motif[9], function (x) aa_props[x])
|
|
|
1822 motif<- gsub(" ","O",motif)
|
|
|
1823 motif <- sapply(motif, function (x) aa_props[x])
|
|
|
1824 Scoringpeptide<-motif
|
|
|
1825 Scoringpeptide<-Scoringpeptide+1
|
|
|
1826 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]*
|
|
|
1827 ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]*
|
|
|
1828 #ThisKinTable[as.numeric(Scoringpeptide[8]),10]*
|
|
|
1829 ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]*
|
|
|
1830 ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17]
|
|
|
1831 NegativeScores[v]<-ThisKinTableScore
|
|
|
1832 ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2])))
|
|
|
1833 NegativeWeirdScores[v]<-ThisKinTableScore*100
|
|
|
1834 }
|
|
|
1835
|
|
|
1836 negativesubstrates<-NegativeSubstrateList[,2]
|
|
|
1837 NegativeWithScores<-cbind(negativesubstrates,as.character(NegativeScores),as.character(NegativeWeirdScores))
|
|
|
1838
|
|
|
1839
|
|
|
1840 #NEED TO HAVE THE NEGATIVE SUBSTRATES BE OUTPUTTED
|
|
|
1841
|
|
|
1842 PositiveScores<-rep(NA,times=nrow(ImportedSubstrateList))
|
|
|
1843 PositiveWeirdScores<-rep(NA,times=nrow(ImportedSubstrateList))
|
|
|
1844
|
|
|
1845 for (v in 1:nrow(ImportedSubstrateList)) {
|
|
|
1846 motif<-ImportedSubstrateList[v,4:18]
|
|
|
1847 motif<-unlist(motif)
|
|
|
1848 motif<- gsub("^$","O",motif)
|
|
|
1849 motif <- sapply(motif, function (x) aa_props[x])
|
|
|
1850 Scoringpeptide<-motif
|
|
|
1851 Scoringpeptide<-Scoringpeptide+1
|
|
|
1852 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]*
|
|
|
1853 ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]*
|
|
|
1854 #ThisKinTable[as.numeric(Scoringpeptide[8]),10]*
|
|
|
1855 ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]*
|
|
|
1856 ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17]
|
|
|
1857
|
|
|
1858 PositiveScores[v]<-ThisKinTableScore
|
|
|
1859 ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2])))
|
|
|
1860 PositiveWeirdScores[v]<-ThisKinTableScore*100
|
|
|
1861 }
|
|
|
1862
|
|
|
1863 positivesubstrates<-ImportedSubstrateList[,4:18]
|
|
|
1864 positivewithscores<-cbind.data.frame(positivesubstrates,PositiveScores,PositiveWeirdScores)
|
|
|
1865
|
|
|
1866
|
|
|
1867 #write down the transient transfection SOP and what we will be doing with them
|
|
|
1868 #write down the vector names I will be using
|
|
|
1869 #write down something about transforming bacteria and with what
|
|
|
1870
|
|
|
1871 #90% whatevernness
|
|
|
1872 # TPninetyone<-length(PositiveWeirdScores[PositiveWeirdScores>=0.91])
|
|
|
1873 # Senseninetyone<-TPninetyone/nrow(positivesubstrates)
|
|
|
1874 #
|
|
|
1875 # TNninetyone<-length(NegativeWeirdScores[NegativeWeirdScores<91])
|
|
|
1876 # Specninetyone<-TNninetyone/100
|
|
|
1877
|
|
|
1878 #create the MCC table
|
|
|
1879
|
|
|
1880 threshold<-c(1:100)
|
|
|
1881 threshold<-order(threshold,decreasing = TRUE)
|
|
|
1882
|
|
|
1883 Truepositives<-c(1:100)
|
|
|
1884 Falsenegatives<-c(1:100)
|
|
|
1885 Sensitivity<-c(1:100)
|
|
|
1886 TrueNegatives<-c(1:100)
|
|
|
1887 FalsePositives<-c(1:100)
|
|
|
1888 Specificity<-c(1:100)
|
|
|
1889 Accuracy<-c(1:100)
|
|
|
1890 MCC<-c(1:100)
|
|
|
1891 EER<-c(1:100)
|
|
|
1892
|
|
|
1893 #MAKE DAMN SURE THAT THE ACCESSION NUMBERS FOLLOW THE MOTIFS
|
|
|
1894
|
|
|
1895 for (z in 1:100) {
|
|
|
1896 thres<-101-z
|
|
|
1897 Truepositives[z]<-length(PositiveWeirdScores[PositiveWeirdScores>=(thres)])
|
|
|
1898 Falsenegatives[z]<-nrow(positivesubstrates)-Truepositives[z]
|
|
|
1899 Sensitivity[z]<-Truepositives[z]/(Falsenegatives[z]+Truepositives[z])
|
|
|
1900 TrueNegatives[z]<-length(NegativeWeirdScores[NegativeWeirdScores<(thres)])
|
|
|
1901 # at thresh 100 this should be 0, because it is total minus true negatives
|
|
|
1902 FalsePositives[z]<-nrow(NegativeSubstrateList)-TrueNegatives[z]
|
|
|
1903 Specificity[z]<-1-(TrueNegatives[z]/(FalsePositives[z]+TrueNegatives[z]))
|
|
|
1904 Accuracy[z]<-100*(Truepositives[z]+TrueNegatives[z])/(Falsenegatives[z]+FalsePositives[z]+TrueNegatives[z]+Truepositives[z])
|
|
|
1905 MCC[z]<-((Truepositives[z]+TrueNegatives[z])-(Falsenegatives[z]+FalsePositives[z]))/sqrt(round(round(Truepositives[z]+Falsenegatives[z])*round(TrueNegatives[z]+FalsePositives[z])*round(Truepositives[z]+FalsePositives[z])*round(TrueNegatives[z]+Falsenegatives[z])))
|
|
|
1906 EER[z]<-.01*(((1-(Sensitivity[z]))*(Truepositives[z]+Falsenegatives[z]))+(Specificity[z]*(1-(Truepositives[z]+Falsenegatives[z]))))
|
|
|
1907 }
|
|
|
1908 Characterization<-cbind.data.frame(threshold,Truepositives,Falsenegatives,Sensitivity,TrueNegatives,FalsePositives,Specificity,Accuracy,MCC,EER)
|
|
|
1909
|
|
|
1910 positiveheader<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,"RPMS","PMS")
|
|
|
1911 positivewithscores<-rbind.data.frame(positiveheader,positivewithscores)
|
|
|
1912
|
|
|
1913 negativeheader<-c("Substrate","RPMS","PMS")
|
|
|
1914 colnames(NegativeWithScores)<-negativeheader
|
|
|
1915
|
|
|
1916 # write.xlsx(NegativeWithScores,file = FILENAME, sheetName = "Negative Sequences Scored",col.names = TRUE,row.names = FALSE,append = TRUE)
|
|
|
1917 # write.xlsx(Characterization,file = FILENAME,sheetName = "Characterization Table",col.names = TRUE,row.names = FALSE,append = TRUE)
|
|
|
1918 # write.xlsx(RanksPeptides,file = FILENAME,sheetName = "Ranked Generated Peptides",col.names = FALSE,row.names = FALSE,append = TRUE)
|
|
|
1919 # write.xlsx(positivewithscores,file = FILENAME, sheetName = "Positive Sequences Scored",col.names = FALSE,row.names = FALSE,append = TRUE)
|
|
|
1920 write.table(x=c("Characterzation Table"),file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",")
|
|
|
1921 header<-colnames(Characterization)
|
|
|
1922 Characterization<-rbind.data.frame(header,Characterization)
|
|
|
1923 write.table(Characterization,file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",")
|
|
|
1924
|
|
|
1925 # header<-colnames(RanksPeptides)
|
|
|
1926 # RanksPeptides<-rbind.data.frame(header,RanksPeptides)
|
|
|
1927 write.table(RanksPeptides,file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",")
|