Mercurial > repos > jfb > st_kinatest
comparison ST Kinatest/test-data/ST_Kinatest.R @ 0:b0493d923404 draft
Uploaded
| author | jfb |
|---|---|
| date | Wed, 27 Jun 2018 11:53:32 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b0493d923404 |
|---|---|
| 1 ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE,header = FALSE) | |
| 2 NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE) | |
| 3 SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE, header = FALSE) | |
| 4 | |
| 5 ScreenerFilename<-"screenerST.csv" | |
| 6 | |
| 7 | |
| 8 | |
| 9 FILENAME<-"output1" | |
| 10 FILENAME2<-"output2" | |
| 11 FILENAME3<-"output3" | |
| 12 FILENAME4<-"output4" | |
| 13 FILENAME5<-"output5" | |
| 14 | |
| 15 | |
| 16 SubstrateBackgroundFrequency<-t(SubstrateBackgroundFrequency) | |
| 17 # number<-nrow(SubstrateBackgroundFrequency)-1 | |
| 18 SubstrateBackgroundFrequency<-SubstrateBackgroundFrequency[2:nrow(SubstrateBackgroundFrequency),] | |
| 19 Sub<-na.omit(SubstrateBackgroundFrequency) | |
| 20 SubstrateBackgroundFrequency<-Sub | |
| 21 | |
| 22 args = commandArgs(trailingOnly=TRUE) | |
| 23 TodaysKinase<-"AKT" | |
| 24 | |
| 25 #as.character(args[1]) | |
| 26 | |
| 27 | |
| 28 | |
| 29 | |
| 30 | |
| 31 | |
| 32 OutputMatrix<-"KinaseMatrix.csv" | |
| 33 CharacterizationTable<-"CharacterizationTableForThisKinase.csv" | |
| 34 SDtable<-"SDtableforthisKinase" | |
| 35 SiteSelectivityTable<-"SiteSelectivityForThisKinase" | |
| 36 | |
| 37 | |
| 38 | |
| 39 substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15) | |
| 40 #SeqsToBeScored<-"asdasd" | |
| 41 | |
| 42 for (i in 2:nrow(ImportedSubstrateList)) | |
| 43 { | |
| 44 substratemotif<-ImportedSubstrateList[i,4:18] | |
| 45 T_or_S<-unlist(strsplit(as.character(substratemotif[8]),split = "")) | |
| 46 substratemotif[8]<-T_or_S[2] | |
| 47 #substratemotif<-paste(substratemotif,sep = "",collapse = "") | |
| 48 j=i-1 | |
| 49 substratemotif<-unlist(substratemotif) | |
| 50 substrates[j,1:15]<-substratemotif | |
| 51 } | |
| 52 | |
| 53 # SpacesToOs<-c(""="O",) | |
| 54 # substrates<-SpacesToOs[substrates] | |
| 55 | |
| 56 #SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2] | |
| 57 | |
| 58 if(2==2){ | |
| 59 Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE) | |
| 60 Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE) | |
| 61 Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE) | |
| 62 Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE) | |
| 63 Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE) | |
| 64 Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE) | |
| 65 Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE) | |
| 66 Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE) | |
| 67 Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE) | |
| 68 Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE) | |
| 69 Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE) | |
| 70 Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE) | |
| 71 Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE) | |
| 72 Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE) | |
| 73 Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE) | |
| 74 Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE) | |
| 75 Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE) | |
| 76 Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE) | |
| 77 Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE) | |
| 78 Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE) | |
| 79 | |
| 80 AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean) | |
| 81 | |
| 82 Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE) | |
| 83 Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE) | |
| 84 Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE) | |
| 85 Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE) | |
| 86 Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE) | |
| 87 Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE) | |
| 88 Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE) | |
| 89 Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE) | |
| 90 Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE) | |
| 91 Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE) | |
| 92 Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE) | |
| 93 Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE) | |
| 94 Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE) | |
| 95 Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE) | |
| 96 Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE) | |
| 97 Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE) | |
| 98 Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE) | |
| 99 Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE) | |
| 100 Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE) | |
| 101 Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE) | |
| 102 } | |
| 103 AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd) | |
| 104 #this is subbackfreq SDs | |
| 105 | |
| 106 SBF_statisticalvalues<-cbind(AllMeans,AllSDs) | |
| 107 | |
| 108 #create the percent table | |
| 109 if (1==1){ | |
| 110 Column1<-substrates[,1] | |
| 111 Column2<-substrates[,2] | |
| 112 Column3<-substrates[,3] | |
| 113 Column4<-substrates[,4] | |
| 114 Column5<-substrates[,5] | |
| 115 Column6<-substrates[,6] | |
| 116 Column7<-substrates[,7] | |
| 117 Column8<-substrates[,8] | |
| 118 Column9<-substrates[,9] | |
| 119 Column10<-substrates[,10] | |
| 120 Column11<-substrates[,11] | |
| 121 Column12<-substrates[,12] | |
| 122 Column13<-substrates[,13] | |
| 123 Column14<-substrates[,14] | |
| 124 Column15<-substrates[,15] | |
| 125 | |
| 126 spaces1<-sum((Column1%in% "")) | |
| 127 spaces2<-sum(Column2%in% "") | |
| 128 spaces3<-sum(Column3%in% "") | |
| 129 spaces4<-sum(Column4%in% "") | |
| 130 spaces5<-sum(Column5%in% "") | |
| 131 spaces6<-sum(Column6%in% "") | |
| 132 spaces7<-sum(Column7%in% "") | |
| 133 spaces8<-sum(Column8%in% "") | |
| 134 spaces9<-sum(Column9%in% "") | |
| 135 spaces10<-sum(Column10%in% "") | |
| 136 spaces11<-sum(Column11%in% "") | |
| 137 spaces12<-sum(Column12%in% "") | |
| 138 spaces13<-sum(Column13%in% "") | |
| 139 spaces14<-sum(Column14%in% "") | |
| 140 spaces15<-sum(Column15%in% "") | |
| 141 | |
| 142 A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1) | |
| 143 A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2) | |
| 144 A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3) | |
| 145 A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4) | |
| 146 A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5) | |
| 147 A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6) | |
| 148 A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7) | |
| 149 A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8) | |
| 150 A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9) | |
| 151 A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10) | |
| 152 A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11) | |
| 153 A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12) | |
| 154 A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13) | |
| 155 A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14) | |
| 156 A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15) | |
| 157 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15) | |
| 158 | |
| 159 C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1) | |
| 160 C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2) | |
| 161 C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3) | |
| 162 C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4) | |
| 163 C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5) | |
| 164 C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6) | |
| 165 C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7) | |
| 166 C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8) | |
| 167 C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9) | |
| 168 C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10) | |
| 169 C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11) | |
| 170 C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12) | |
| 171 C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13) | |
| 172 C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14) | |
| 173 C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15) | |
| 174 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15) | |
| 175 | |
| 176 D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1) | |
| 177 D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2) | |
| 178 D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3) | |
| 179 D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4) | |
| 180 D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5) | |
| 181 D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6) | |
| 182 D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7) | |
| 183 D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8) | |
| 184 D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9) | |
| 185 D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10) | |
| 186 D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11) | |
| 187 D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12) | |
| 188 D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13) | |
| 189 D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14) | |
| 190 D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15) | |
| 191 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15) | |
| 192 | |
| 193 E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1) | |
| 194 E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2) | |
| 195 E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3) | |
| 196 E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4) | |
| 197 E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5) | |
| 198 E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6) | |
| 199 E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7) | |
| 200 E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8) | |
| 201 E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9) | |
| 202 E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10) | |
| 203 E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11) | |
| 204 E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12) | |
| 205 E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13) | |
| 206 E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14) | |
| 207 E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15) | |
| 208 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15) | |
| 209 | |
| 210 | |
| 211 F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1) | |
| 212 F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2) | |
| 213 F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3) | |
| 214 F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4) | |
| 215 F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5) | |
| 216 F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6) | |
| 217 F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7) | |
| 218 F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8) | |
| 219 F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9) | |
| 220 F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10) | |
| 221 F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11) | |
| 222 F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12) | |
| 223 F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13) | |
| 224 F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14) | |
| 225 F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15) | |
| 226 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15) | |
| 227 | |
| 228 | |
| 229 G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1) | |
| 230 G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2) | |
| 231 G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3) | |
| 232 G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4) | |
| 233 G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5) | |
| 234 G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6) | |
| 235 G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7) | |
| 236 G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8) | |
| 237 G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9) | |
| 238 G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10) | |
| 239 G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11) | |
| 240 G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12) | |
| 241 G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13) | |
| 242 G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14) | |
| 243 G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15) | |
| 244 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15) | |
| 245 | |
| 246 | |
| 247 H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1) | |
| 248 H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2) | |
| 249 H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3) | |
| 250 H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4) | |
| 251 H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5) | |
| 252 H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6) | |
| 253 H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7) | |
| 254 H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8) | |
| 255 H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9) | |
| 256 H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10) | |
| 257 H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11) | |
| 258 H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12) | |
| 259 H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13) | |
| 260 H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14) | |
| 261 H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15) | |
| 262 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15) | |
| 263 | |
| 264 | |
| 265 I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1) | |
| 266 I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2) | |
| 267 I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3) | |
| 268 I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4) | |
| 269 I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5) | |
| 270 I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6) | |
| 271 I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7) | |
| 272 I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8) | |
| 273 I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9) | |
| 274 I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10) | |
| 275 I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11) | |
| 276 I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12) | |
| 277 I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13) | |
| 278 I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14) | |
| 279 I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15) | |
| 280 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15) | |
| 281 | |
| 282 | |
| 283 K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1) | |
| 284 K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2) | |
| 285 K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3) | |
| 286 K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4) | |
| 287 K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5) | |
| 288 K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6) | |
| 289 K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7) | |
| 290 K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8) | |
| 291 K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9) | |
| 292 K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10) | |
| 293 K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11) | |
| 294 K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12) | |
| 295 K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13) | |
| 296 K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14) | |
| 297 K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15) | |
| 298 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15) | |
| 299 | |
| 300 | |
| 301 L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1) | |
| 302 L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2) | |
| 303 L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3) | |
| 304 L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4) | |
| 305 L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5) | |
| 306 L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6) | |
| 307 L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7) | |
| 308 L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8) | |
| 309 L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9) | |
| 310 L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10) | |
| 311 L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11) | |
| 312 L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12) | |
| 313 L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13) | |
| 314 L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14) | |
| 315 L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15) | |
| 316 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15) | |
| 317 | |
| 318 | |
| 319 M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1) | |
| 320 M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2) | |
| 321 M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3) | |
| 322 M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4) | |
| 323 M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5) | |
| 324 M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6) | |
| 325 M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7) | |
| 326 M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8) | |
| 327 M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9) | |
| 328 M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10) | |
| 329 M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11) | |
| 330 M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12) | |
| 331 M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13) | |
| 332 M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14) | |
| 333 M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15) | |
| 334 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15) | |
| 335 | |
| 336 | |
| 337 N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1) | |
| 338 N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2) | |
| 339 N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3) | |
| 340 N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4) | |
| 341 N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5) | |
| 342 N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6) | |
| 343 N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7) | |
| 344 N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8) | |
| 345 N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9) | |
| 346 N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10) | |
| 347 N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11) | |
| 348 N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12) | |
| 349 N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13) | |
| 350 N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14) | |
| 351 N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15) | |
| 352 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15) | |
| 353 | |
| 354 | |
| 355 P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1) | |
| 356 P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2) | |
| 357 P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3) | |
| 358 P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4) | |
| 359 P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5) | |
| 360 P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6) | |
| 361 P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7) | |
| 362 P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8) | |
| 363 P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9) | |
| 364 P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10) | |
| 365 P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11) | |
| 366 P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12) | |
| 367 P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13) | |
| 368 P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14) | |
| 369 P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15) | |
| 370 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15) | |
| 371 | |
| 372 | |
| 373 Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1) | |
| 374 Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2) | |
| 375 Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3) | |
| 376 Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4) | |
| 377 Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5) | |
| 378 Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6) | |
| 379 Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7) | |
| 380 Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8) | |
| 381 Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9) | |
| 382 Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10) | |
| 383 Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11) | |
| 384 Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12) | |
| 385 Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13) | |
| 386 Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14) | |
| 387 Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15) | |
| 388 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15) | |
| 389 | |
| 390 | |
| 391 R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1) | |
| 392 R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2) | |
| 393 R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3) | |
| 394 R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4) | |
| 395 R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5) | |
| 396 R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6) | |
| 397 R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7) | |
| 398 R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8) | |
| 399 R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9) | |
| 400 R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10) | |
| 401 R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11) | |
| 402 R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12) | |
| 403 R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13) | |
| 404 R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14) | |
| 405 R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15) | |
| 406 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15) | |
| 407 | |
| 408 | |
| 409 S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1) | |
| 410 S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2) | |
| 411 S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3) | |
| 412 S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4) | |
| 413 S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5) | |
| 414 S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6) | |
| 415 S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7) | |
| 416 S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8) | |
| 417 S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9) | |
| 418 S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10) | |
| 419 S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11) | |
| 420 S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12) | |
| 421 S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13) | |
| 422 S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14) | |
| 423 S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15) | |
| 424 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15) | |
| 425 | |
| 426 | |
| 427 T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1) | |
| 428 T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2) | |
| 429 T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3) | |
| 430 T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4) | |
| 431 T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5) | |
| 432 T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6) | |
| 433 T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7) | |
| 434 T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8) | |
| 435 T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9) | |
| 436 T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10) | |
| 437 T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11) | |
| 438 T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12) | |
| 439 T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13) | |
| 440 T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14) | |
| 441 T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15) | |
| 442 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15) | |
| 443 | |
| 444 | |
| 445 V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1) | |
| 446 V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2) | |
| 447 V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3) | |
| 448 V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4) | |
| 449 V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5) | |
| 450 V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6) | |
| 451 V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7) | |
| 452 V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8) | |
| 453 V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9) | |
| 454 V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10) | |
| 455 V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11) | |
| 456 V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12) | |
| 457 V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13) | |
| 458 V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14) | |
| 459 V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15) | |
| 460 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15) | |
| 461 | |
| 462 | |
| 463 W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1) | |
| 464 W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2) | |
| 465 W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3) | |
| 466 W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4) | |
| 467 W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5) | |
| 468 W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6) | |
| 469 W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7) | |
| 470 W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8) | |
| 471 W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9) | |
| 472 W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10) | |
| 473 W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11) | |
| 474 W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12) | |
| 475 W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13) | |
| 476 W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14) | |
| 477 W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15) | |
| 478 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15) | |
| 479 | |
| 480 | |
| 481 Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1) | |
| 482 Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2) | |
| 483 Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3) | |
| 484 Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4) | |
| 485 Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5) | |
| 486 Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6) | |
| 487 Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7) | |
| 488 Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8) | |
| 489 Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9) | |
| 490 Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10) | |
| 491 Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11) | |
| 492 Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12) | |
| 493 Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13) | |
| 494 Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14) | |
| 495 Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15) | |
| 496 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15) | |
| 497 } | |
| 498 #this is substrate percents | |
| 499 | |
| 500 #A C D E F G H I K L N P Q R S T V W Y | |
| 501 | |
| 502 PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs) | |
| 503 PercentTable<-PercentTable*100 | |
| 504 | |
| 505 #create the SD table | |
| 506 SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable)) | |
| 507 #for every row, a percertage minus the same mean over the same SD | |
| 508 if(1==1){ | |
| 509 SDtable[1,]<-(PercentTable[1,]-Amean)/Asd | |
| 510 SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd | |
| 511 SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd | |
| 512 SDtable[4,]<-(PercentTable[4,]-Emean)/Esd | |
| 513 SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd | |
| 514 SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd | |
| 515 SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd | |
| 516 SDtable[8,]<-(PercentTable[8,]-Imean)/Isd | |
| 517 SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd | |
| 518 SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd | |
| 519 SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd | |
| 520 SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd | |
| 521 SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd | |
| 522 SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd | |
| 523 SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd | |
| 524 SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd | |
| 525 SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd | |
| 526 SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd | |
| 527 SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd | |
| 528 SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd | |
| 529 } | |
| 530 | |
| 531 | |
| 532 SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y") | |
| 533 | |
| 534 SumOfSigmaAAs<-c(1:15) | |
| 535 | |
| 536 for (i in 1:15){ | |
| 537 SumOfSigmasValue<-0 | |
| 538 for (j in 1:20){ | |
| 539 value<-0 | |
| 540 if (SDtable[j,i]>2){ | |
| 541 q=j+1 | |
| 542 value<-sum(substrates[,i]==SetOfAAs[q]) | |
| 543 } | |
| 544 SumOfSigmasValue<-SumOfSigmasValue+value | |
| 545 } | |
| 546 SumOfSigmaAAs[i]<-SumOfSigmasValue | |
| 547 } | |
| 548 | |
| 549 AAs1<-length(substrates[,1])-sum(substrates[,1]=="") | |
| 550 AAs2<-length(substrates[,2])-sum(substrates[,2]=="") | |
| 551 AAs3<-length(substrates[,3])-sum(substrates[,3]=="") | |
| 552 AAs4<-length(substrates[,4])-sum(substrates[,4]=="") | |
| 553 AAs5<-length(substrates[,5])-sum(substrates[,5]=="") | |
| 554 AAs6<-length(substrates[,6])-sum(substrates[,6]=="") | |
| 555 AAs7<-length(substrates[,7])-sum(substrates[,7]=="") | |
| 556 AAs8<-length(substrates[,8])-sum(substrates[,8]=="") | |
| 557 AAs9<-length(substrates[,9])-sum(substrates[,9]=="") | |
| 558 AAs10<-length(substrates[,10])-sum(substrates[,10]=="") | |
| 559 AAs11<-length(substrates[,11])-sum(substrates[,11]=="") | |
| 560 AAs12<-length(substrates[,12])-sum(substrates[,12]=="") | |
| 561 AAs13<-length(substrates[,13])-sum(substrates[,13]=="") | |
| 562 AAs14<-length(substrates[,14])-sum(substrates[,14]=="") | |
| 563 AAs15<-length(substrates[,15])-sum(substrates[,15]=="") | |
| 564 | |
| 565 AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9,AAs10,AAs11,AAs12,AAs13,AAs14,AAs15) | |
| 566 # AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]), | |
| 567 # length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]), | |
| 568 # length(substrates[,9])) | |
| 569 | |
| 570 SumOfExpectedSigmaAAs<-c(1:15) | |
| 571 for (i in 1:15){ | |
| 572 ExpectedValue<-0 | |
| 573 for (j in 1:20){ | |
| 574 value<-0 | |
| 575 if (SDtable[j,i]>2){ | |
| 576 value<-AllMeans[j] | |
| 577 } | |
| 578 ExpectedValue<-ExpectedValue+value | |
| 579 } | |
| 580 SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100 | |
| 581 } | |
| 582 | |
| 583 #SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs | |
| 584 | |
| 585 SelectivityRow<-rep(0,times=15) | |
| 586 | |
| 587 for (q in 1:15) { | |
| 588 SelectivityRow[q]<-(SumOfSigmaAAs[q]/SumOfExpectedSigmaAAs[q])*(SumOfSigmaAAs[q]/AAsAtPositions[q]) | |
| 589 } | |
| 590 | |
| 591 SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow) | |
| 592 | |
| 593 SetOfAAs<-matrix(data = SetOfAAs,ncol = 1) | |
| 594 SDtableu<-SDtable | |
| 595 HeaderSD<-c(-7:7) | |
| 596 SDtable<-rbind(HeaderSD,SDtableu) | |
| 597 row.names(SDtable)<-NULL | |
| 598 SDtable<-data.frame(SetOfAAs,SDtable) | |
| 599 | |
| 600 PercentTable<-rbind(HeaderSD,PercentTable) | |
| 601 row.names(PercentTable)<-NULL | |
| 602 PercentTable<-data.frame(SetOfAAs,PercentTable) | |
| 603 numberofY<-as.numeric(SubstrateBackgroundFrequency[,34]) | |
| 604 numberofY<-numberofY[!is.na(numberofY)] | |
| 605 | |
| 606 numberofPY<-as.numeric(SubstrateBackgroundFrequency[,35]) | |
| 607 numberofPY<-numberofPY[!is.na(numberofPY)] | |
| 608 | |
| 609 NormalizationScore<-sum(numberofPY)/sum(numberofY) | |
| 610 | |
| 611 # positions<-matrix(data = NA, nrow=20,ncol = 15) | |
| 612 # | |
| 613 # #column1 | |
| 614 # | |
| 615 # for (q in 1:15) { | |
| 616 # sA<-sum(substrates[,i]=="A") | |
| 617 # positions[1,i]<-sA | |
| 618 # sC<-sum(substrates[,i]=="C") | |
| 619 # positions[2,i]<-sC | |
| 620 # sD<-sum(substrates[,i]=="D") | |
| 621 # positions[3,i]<-sD | |
| 622 # sE<-sum(substrates[,i]=="E") | |
| 623 # positions[4,i]<-sE | |
| 624 # sF<-sum(substrates[,i]=="F") | |
| 625 # sG<-sum(substrates[,i]=="G") | |
| 626 # sH<-sum(substrates[,i]=="H") | |
| 627 # sI<-sum(substrates[,i]=="I") | |
| 628 # sK<-sum(substrates[,i]=="K") | |
| 629 # sL<-sum(substrates[,i]=="L") | |
| 630 # sM<-sum(substrates[,i]=="M") | |
| 631 # sN<-sum(substrates[,i]=="N") | |
| 632 # sP<-sum(substrates[,i]=="P") | |
| 633 # sQ<-sum(substrates[,i]=="Q") | |
| 634 # sR<-sum(substrates[,i]=="R") | |
| 635 # sS<-sum(substrates[,i]=="S") | |
| 636 # sT<-sum(substrates[,i]=="T") | |
| 637 # sV<-sum(substrates[,i]=="V") | |
| 638 # sW<-sum(substrates[,i]=="W") | |
| 639 # sY<-sum(substrates[,i]=="Y") | |
| 640 # positions[5,i]<-sF | |
| 641 # positions[6,i]<-sG | |
| 642 # positions[7,i]<-sH | |
| 643 # positions[8,i]<-sI | |
| 644 # positions[9,i]<-sK | |
| 645 # positions[10,i]<-sL | |
| 646 # positions[11,i]<-sM | |
| 647 # positions[12,i]<-sN | |
| 648 # positions[13,i]<-sP | |
| 649 # positions[14,i]<-sQ | |
| 650 # positions[15,i]<-sR | |
| 651 # positions[16,i]<-sS | |
| 652 # positions[17,i]<-sT | |
| 653 # positions[18,i]<-sV | |
| 654 # positions[19,i]<-sW | |
| 655 # positions[20,i]<-sY | |
| 656 # } | |
| 657 | |
| 658 #here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot? | |
| 659 if (6==6){ | |
| 660 Column1<-substrates[,1] | |
| 661 Column2<-substrates[,2] | |
| 662 Column3<-substrates[,3] | |
| 663 Column4<-substrates[,4] | |
| 664 Column5<-substrates[,5] | |
| 665 Column6<-substrates[,6] | |
| 666 Column7<-substrates[,7] | |
| 667 Column8<-substrates[,8] | |
| 668 Column9<-substrates[,9] | |
| 669 Column10<-substrates[,10] | |
| 670 Column11<-substrates[,11] | |
| 671 Column12<-substrates[,12] | |
| 672 Column13<-substrates[,13] | |
| 673 Column14<-substrates[,14] | |
| 674 Column15<-substrates[,15] | |
| 675 | |
| 676 spaces1<-sum((Column1%in% "")) | |
| 677 spaces2<-sum(Column2%in% "") | |
| 678 spaces3<-sum(Column3%in% "") | |
| 679 spaces4<-sum(Column4%in% "") | |
| 680 spaces5<-sum(Column5%in% "") | |
| 681 spaces6<-sum(Column6%in% "") | |
| 682 spaces7<-sum(Column7%in% "") | |
| 683 spaces8<-sum(Column8%in% "") | |
| 684 spaces9<-sum(Column9%in% "") | |
| 685 spaces10<-sum(Column10%in% "") | |
| 686 spaces11<-sum(Column11%in% "") | |
| 687 spaces12<-sum(Column12%in% "") | |
| 688 spaces13<-sum(Column13%in% "") | |
| 689 spaces14<-sum(Column14%in% "") | |
| 690 spaces15<-sum(Column15%in% "") | |
| 691 | |
| 692 A1<-sum(Column1 %in% "A") | |
| 693 A2<-sum(Column2 %in% "A") | |
| 694 A3<-sum(Column3 %in% "A") | |
| 695 A4<-sum(Column4 %in% "A") | |
| 696 A5<-sum(Column5 %in% "A") | |
| 697 A6<-sum(Column6 %in% "A") | |
| 698 A7<-sum(Column7 %in% "A") | |
| 699 A8<-sum(Column8 %in% "A") | |
| 700 A9<-sum(Column9 %in% "A") | |
| 701 A10<-sum(Column10 %in% "A") | |
| 702 A11<-sum(Column11 %in% "A") | |
| 703 A12<-sum(Column12 %in% "A") | |
| 704 A13<-sum(Column13 %in% "A") | |
| 705 A14<-sum(Column14 %in% "A") | |
| 706 A15<-sum(Column15 %in% "A") | |
| 707 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15) | |
| 708 | |
| 709 C1<-sum(Column1 %in% "C") | |
| 710 C2<-sum(Column2 %in% "C") | |
| 711 C3<-sum(Column3 %in% "C") | |
| 712 C4<-sum(Column4 %in% "C") | |
| 713 C5<-sum(Column5 %in% "C") | |
| 714 C6<-sum(Column6 %in% "C") | |
| 715 C7<-sum(Column7 %in% "C") | |
| 716 C8<-sum(Column8 %in% "C") | |
| 717 C9<-sum(Column9 %in% "C") | |
| 718 C10<-sum(Column10 %in% "C") | |
| 719 C11<-sum(Column11 %in% "C") | |
| 720 C12<-sum(Column12 %in% "C") | |
| 721 C13<-sum(Column13 %in% "C") | |
| 722 C14<-sum(Column14 %in% "C") | |
| 723 C15<-sum(Column15 %in% "C") | |
| 724 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15) | |
| 725 | |
| 726 D1<-sum(Column1 %in% "D") | |
| 727 D2<-sum(Column2 %in% "D") | |
| 728 D3<-sum(Column3 %in% "D") | |
| 729 D4<-sum(Column4 %in% "D") | |
| 730 D5<-sum(Column5 %in% "D") | |
| 731 D6<-sum(Column6 %in% "D") | |
| 732 D7<-sum(Column7 %in% "D") | |
| 733 D8<-sum(Column8 %in% "D") | |
| 734 D9<-sum(Column9 %in% "D") | |
| 735 D10<-sum(Column10 %in% "D") | |
| 736 D11<-sum(Column11 %in% "D") | |
| 737 D12<-sum(Column12 %in% "D") | |
| 738 D13<-sum(Column13 %in% "D") | |
| 739 D14<-sum(Column14 %in% "D") | |
| 740 D15<-sum(Column15 %in% "D") | |
| 741 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15) | |
| 742 | |
| 743 E1<-sum(Column1 %in% "E") | |
| 744 E2<-sum(Column2 %in% "E") | |
| 745 E3<-sum(Column3 %in% "E") | |
| 746 E4<-sum(Column4 %in% "E") | |
| 747 E5<-sum(Column5 %in% "E") | |
| 748 E6<-sum(Column6 %in% "E") | |
| 749 E7<-sum(Column7 %in% "E") | |
| 750 E8<-sum(Column8 %in% "E") | |
| 751 E9<-sum(Column9 %in% "E") | |
| 752 E10<-sum(Column10 %in% "E") | |
| 753 E11<-sum(Column11 %in% "E") | |
| 754 E12<-sum(Column12 %in% "E") | |
| 755 E13<-sum(Column13 %in% "E") | |
| 756 E14<-sum(Column14 %in% "E") | |
| 757 E15<-sum(Column15 %in% "E") | |
| 758 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15) | |
| 759 | |
| 760 F1<-sum(Column1 %in% "F") | |
| 761 F2<-sum(Column2 %in% "F") | |
| 762 F3<-sum(Column3 %in% "F") | |
| 763 F4<-sum(Column4 %in% "F") | |
| 764 F5<-sum(Column5 %in% "F") | |
| 765 F6<-sum(Column6 %in% "F") | |
| 766 F7<-sum(Column7 %in% "F") | |
| 767 F8<-sum(Column8 %in% "F") | |
| 768 F9<-sum(Column9 %in% "F") | |
| 769 F10<-sum(Column10 %in% "F") | |
| 770 F11<-sum(Column11 %in% "F") | |
| 771 F12<-sum(Column12 %in% "F") | |
| 772 F13<-sum(Column13 %in% "F") | |
| 773 F14<-sum(Column14 %in% "F") | |
| 774 F15<-sum(Column15 %in% "F") | |
| 775 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15) | |
| 776 | |
| 777 G1<-sum(Column1 %in% "G") | |
| 778 G2<-sum(Column2 %in% "G") | |
| 779 G3<-sum(Column3 %in% "G") | |
| 780 G4<-sum(Column4 %in% "G") | |
| 781 G5<-sum(Column5 %in% "G") | |
| 782 G6<-sum(Column6 %in% "G") | |
| 783 G7<-sum(Column7 %in% "G") | |
| 784 G8<-sum(Column8 %in% "G") | |
| 785 G9<-sum(Column9 %in% "G") | |
| 786 G10<-sum(Column10 %in% "G") | |
| 787 G11<-sum(Column11 %in% "G") | |
| 788 G12<-sum(Column12 %in% "G") | |
| 789 G13<-sum(Column13 %in% "G") | |
| 790 G14<-sum(Column14 %in% "G") | |
| 791 G15<-sum(Column15 %in% "G") | |
| 792 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15) | |
| 793 | |
| 794 H1<-sum(Column1 %in% "H") | |
| 795 H2<-sum(Column2 %in% "H") | |
| 796 H3<-sum(Column3 %in% "H") | |
| 797 H4<-sum(Column4 %in% "H") | |
| 798 H5<-sum(Column5 %in% "H") | |
| 799 H6<-sum(Column6 %in% "H") | |
| 800 H7<-sum(Column7 %in% "H") | |
| 801 H8<-sum(Column8 %in% "H") | |
| 802 H9<-sum(Column9 %in% "H") | |
| 803 H10<-sum(Column10 %in% "H") | |
| 804 H11<-sum(Column11 %in% "H") | |
| 805 H12<-sum(Column12 %in% "H") | |
| 806 H13<-sum(Column13 %in% "H") | |
| 807 H14<-sum(Column14 %in% "H") | |
| 808 H15<-sum(Column15 %in% "H") | |
| 809 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15) | |
| 810 | |
| 811 I1<-sum(Column1 %in% "I") | |
| 812 I2<-sum(Column2 %in% "I") | |
| 813 I3<-sum(Column3 %in% "I") | |
| 814 I4<-sum(Column4 %in% "I") | |
| 815 I5<-sum(Column5 %in% "I") | |
| 816 I6<-sum(Column6 %in% "I") | |
| 817 I7<-sum(Column7 %in% "I") | |
| 818 I8<-sum(Column8 %in% "I") | |
| 819 I9<-sum(Column9 %in% "I") | |
| 820 I10<-sum(Column10 %in% "I") | |
| 821 I11<-sum(Column11 %in% "I") | |
| 822 I12<-sum(Column12 %in% "I") | |
| 823 I13<-sum(Column13 %in% "I") | |
| 824 I14<-sum(Column14 %in% "I") | |
| 825 I15<-sum(Column15 %in% "I") | |
| 826 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15) | |
| 827 | |
| 828 K1<-sum(Column1 %in% "K") | |
| 829 K2<-sum(Column2 %in% "K") | |
| 830 K3<-sum(Column3 %in% "K") | |
| 831 K4<-sum(Column4 %in% "K") | |
| 832 K5<-sum(Column5 %in% "K") | |
| 833 K6<-sum(Column6 %in% "K") | |
| 834 K7<-sum(Column7 %in% "K") | |
| 835 K8<-sum(Column8 %in% "K") | |
| 836 K9<-sum(Column9 %in% "K") | |
| 837 K10<-sum(Column10 %in% "K") | |
| 838 K11<-sum(Column11 %in% "K") | |
| 839 K12<-sum(Column12 %in% "K") | |
| 840 K13<-sum(Column13 %in% "K") | |
| 841 K14<-sum(Column14 %in% "K") | |
| 842 K15<-sum(Column15 %in% "K") | |
| 843 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15) | |
| 844 | |
| 845 L1<-sum(Column1 %in% "L") | |
| 846 L2<-sum(Column2 %in% "L") | |
| 847 L3<-sum(Column3 %in% "L") | |
| 848 L4<-sum(Column4 %in% "L") | |
| 849 L5<-sum(Column5 %in% "L") | |
| 850 L6<-sum(Column6 %in% "L") | |
| 851 L7<-sum(Column7 %in% "L") | |
| 852 L8<-sum(Column8 %in% "L") | |
| 853 L9<-sum(Column9 %in% "L") | |
| 854 L10<-sum(Column10 %in% "L") | |
| 855 L11<-sum(Column11 %in% "L") | |
| 856 L12<-sum(Column12 %in% "L") | |
| 857 L13<-sum(Column13 %in% "L") | |
| 858 L14<-sum(Column14 %in% "L") | |
| 859 L15<-sum(Column15 %in% "L") | |
| 860 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15) | |
| 861 | |
| 862 M1<-sum(Column1 %in% "M") | |
| 863 M2<-sum(Column2 %in% "M") | |
| 864 M3<-sum(Column3 %in% "M") | |
| 865 M4<-sum(Column4 %in% "M") | |
| 866 M5<-sum(Column5 %in% "M") | |
| 867 M6<-sum(Column6 %in% "M") | |
| 868 M7<-sum(Column7 %in% "M") | |
| 869 M8<-sum(Column8 %in% "M") | |
| 870 M9<-sum(Column9 %in% "M") | |
| 871 M10<-sum(Column10 %in% "M") | |
| 872 M11<-sum(Column11 %in% "M") | |
| 873 M12<-sum(Column12 %in% "M") | |
| 874 M13<-sum(Column13 %in% "M") | |
| 875 M14<-sum(Column14 %in% "M") | |
| 876 M15<-sum(Column15 %in% "M") | |
| 877 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15) | |
| 878 | |
| 879 N1<-sum(Column1 %in% "N") | |
| 880 N2<-sum(Column2 %in% "N") | |
| 881 N3<-sum(Column3 %in% "N") | |
| 882 N4<-sum(Column4 %in% "N") | |
| 883 N5<-sum(Column5 %in% "N") | |
| 884 N6<-sum(Column6 %in% "N") | |
| 885 N7<-sum(Column7 %in% "N") | |
| 886 N8<-sum(Column8 %in% "N") | |
| 887 N9<-sum(Column9 %in% "N") | |
| 888 N10<-sum(Column10 %in% "N") | |
| 889 N11<-sum(Column11 %in% "N") | |
| 890 N12<-sum(Column12 %in% "N") | |
| 891 N13<-sum(Column13 %in% "N") | |
| 892 N14<-sum(Column14 %in% "N") | |
| 893 N15<-sum(Column15 %in% "N") | |
| 894 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15) | |
| 895 | |
| 896 P1<-sum(Column1 %in% "P") | |
| 897 P2<-sum(Column2 %in% "P") | |
| 898 P3<-sum(Column3 %in% "P") | |
| 899 P4<-sum(Column4 %in% "P") | |
| 900 P5<-sum(Column5 %in% "P") | |
| 901 P6<-sum(Column6 %in% "P") | |
| 902 P7<-sum(Column7 %in% "P") | |
| 903 P8<-sum(Column8 %in% "P") | |
| 904 P9<-sum(Column9 %in% "P") | |
| 905 P10<-sum(Column10 %in% "P") | |
| 906 P11<-sum(Column11 %in% "P") | |
| 907 P12<-sum(Column12 %in% "P") | |
| 908 P13<-sum(Column13 %in% "P") | |
| 909 P14<-sum(Column14 %in% "P") | |
| 910 P15<-sum(Column15 %in% "P") | |
| 911 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15) | |
| 912 | |
| 913 Q1<-sum(Column1 %in% "Q") | |
| 914 Q2<-sum(Column2 %in% "Q") | |
| 915 Q3<-sum(Column3 %in% "Q") | |
| 916 Q4<-sum(Column4 %in% "Q") | |
| 917 Q5<-sum(Column5 %in% "Q") | |
| 918 Q6<-sum(Column6 %in% "Q") | |
| 919 Q7<-sum(Column7 %in% "Q") | |
| 920 Q8<-sum(Column8 %in% "Q") | |
| 921 Q9<-sum(Column9 %in% "Q") | |
| 922 Q10<-sum(Column10 %in% "Q") | |
| 923 Q11<-sum(Column11 %in% "Q") | |
| 924 Q12<-sum(Column12 %in% "Q") | |
| 925 Q13<-sum(Column13 %in% "Q") | |
| 926 Q14<-sum(Column14 %in% "Q") | |
| 927 Q15<-sum(Column15 %in% "Q") | |
| 928 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15) | |
| 929 | |
| 930 R1<-sum(Column1 %in% "R") | |
| 931 R2<-sum(Column2 %in% "R") | |
| 932 R3<-sum(Column3 %in% "R") | |
| 933 R4<-sum(Column4 %in% "R") | |
| 934 R5<-sum(Column5 %in% "R") | |
| 935 R6<-sum(Column6 %in% "R") | |
| 936 R7<-sum(Column7 %in% "R") | |
| 937 R8<-sum(Column8 %in% "R") | |
| 938 R9<-sum(Column9 %in% "R") | |
| 939 R10<-sum(Column10 %in% "R") | |
| 940 R11<-sum(Column11 %in% "R") | |
| 941 R12<-sum(Column12 %in% "R") | |
| 942 R13<-sum(Column13 %in% "R") | |
| 943 R14<-sum(Column14 %in% "R") | |
| 944 R15<-sum(Column15 %in% "R") | |
| 945 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15) | |
| 946 | |
| 947 S1<-sum(Column1 %in% "S") | |
| 948 S2<-sum(Column2 %in% "S") | |
| 949 S3<-sum(Column3 %in% "S") | |
| 950 S4<-sum(Column4 %in% "S") | |
| 951 S5<-sum(Column5 %in% "S") | |
| 952 S6<-sum(Column6 %in% "S") | |
| 953 S7<-sum(Column7 %in% "S") | |
| 954 S8<-sum(Column8 %in% "S") | |
| 955 S9<-sum(Column9 %in% "S") | |
| 956 S10<-sum(Column10 %in% "S") | |
| 957 S11<-sum(Column11 %in% "S") | |
| 958 S12<-sum(Column12 %in% "S") | |
| 959 S13<-sum(Column13 %in% "S") | |
| 960 S14<-sum(Column14 %in% "S") | |
| 961 S15<-sum(Column15 %in% "S") | |
| 962 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15) | |
| 963 | |
| 964 T1<-sum(Column1 %in% "T") | |
| 965 T2<-sum(Column2 %in% "T") | |
| 966 T3<-sum(Column3 %in% "T") | |
| 967 T4<-sum(Column4 %in% "T") | |
| 968 T5<-sum(Column5 %in% "T") | |
| 969 T6<-sum(Column6 %in% "T") | |
| 970 T7<-sum(Column7 %in% "T") | |
| 971 T8<-sum(Column8 %in% "T") | |
| 972 T9<-sum(Column9 %in% "T") | |
| 973 T10<-sum(Column10 %in% "T") | |
| 974 T11<-sum(Column11 %in% "T") | |
| 975 T12<-sum(Column12 %in% "T") | |
| 976 T13<-sum(Column13 %in% "T") | |
| 977 T14<-sum(Column14 %in% "T") | |
| 978 T15<-sum(Column15 %in% "T") | |
| 979 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15) | |
| 980 | |
| 981 V1<-sum(Column1 %in% "V") | |
| 982 V2<-sum(Column2 %in% "V") | |
| 983 V3<-sum(Column3 %in% "V") | |
| 984 V4<-sum(Column4 %in% "V") | |
| 985 V5<-sum(Column5 %in% "V") | |
| 986 V6<-sum(Column6 %in% "V") | |
| 987 V7<-sum(Column7 %in% "V") | |
| 988 V8<-sum(Column8 %in% "V") | |
| 989 V9<-sum(Column9 %in% "V") | |
| 990 V10<-sum(Column10 %in% "V") | |
| 991 V11<-sum(Column11 %in% "V") | |
| 992 V12<-sum(Column12 %in% "V") | |
| 993 V13<-sum(Column13 %in% "V") | |
| 994 V14<-sum(Column14 %in% "V") | |
| 995 V15<-sum(Column15 %in% "V") | |
| 996 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15) | |
| 997 | |
| 998 W1<-sum(Column1 %in% "W") | |
| 999 W2<-sum(Column2 %in% "W") | |
| 1000 W3<-sum(Column3 %in% "W") | |
| 1001 W4<-sum(Column4 %in% "W") | |
| 1002 W5<-sum(Column5 %in% "W") | |
| 1003 W6<-sum(Column6 %in% "W") | |
| 1004 W7<-sum(Column7 %in% "W") | |
| 1005 W8<-sum(Column8 %in% "W") | |
| 1006 W9<-sum(Column9 %in% "W") | |
| 1007 W10<-sum(Column10 %in% "W") | |
| 1008 W11<-sum(Column11 %in% "W") | |
| 1009 W12<-sum(Column12 %in% "W") | |
| 1010 W13<-sum(Column13 %in% "W") | |
| 1011 W14<-sum(Column14 %in% "W") | |
| 1012 W15<-sum(Column15 %in% "W") | |
| 1013 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15) | |
| 1014 | |
| 1015 Y1<-sum(Column1 %in% "Y") | |
| 1016 Y2<-sum(Column2 %in% "Y") | |
| 1017 Y3<-sum(Column3 %in% "Y") | |
| 1018 Y4<-sum(Column4 %in% "Y") | |
| 1019 Y5<-sum(Column5 %in% "Y") | |
| 1020 Y6<-sum(Column6 %in% "Y") | |
| 1021 Y7<-sum(Column7 %in% "Y") | |
| 1022 Y8<-sum(Column8 %in% "Y") | |
| 1023 Y9<-sum(Column9 %in% "Y") | |
| 1024 Y10<-sum(Column10 %in% "Y") | |
| 1025 Y11<-sum(Column11 %in% "Y") | |
| 1026 Y12<-sum(Column12 %in% "Y") | |
| 1027 Y13<-sum(Column13 %in% "Y") | |
| 1028 Y14<-sum(Column14 %in% "Y") | |
| 1029 Y15<-sum(Column15 %in% "Y") | |
| 1030 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15) | |
| 1031 PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs) | |
| 1032 } | |
| 1033 #endogenous prob matrix is AA position over subbackfreqmean | |
| 1034 dim(PositionTable) | |
| 1035 EPMtable<-PositionTable | |
| 1036 # EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean)) | |
| 1037 # EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean)) | |
| 1038 # EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean)) | |
| 1039 # EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean)) | |
| 1040 # EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean)) | |
| 1041 # EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean)) | |
| 1042 # EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean)) | |
| 1043 # EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean)) | |
| 1044 # EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean)) | |
| 1045 # EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean)) | |
| 1046 # EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean)) | |
| 1047 # EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean)) | |
| 1048 # EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean)) | |
| 1049 # EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean)) | |
| 1050 # EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean)) | |
| 1051 # EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean)) | |
| 1052 # EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean)) | |
| 1053 # EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean)) | |
| 1054 # EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean)) | |
| 1055 # EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean)) | |
| 1056 | |
| 1057 columns<-c(length(Column1)-sum(Column1==""), | |
| 1058 length(Column2)-sum(Column2==""), | |
| 1059 length(Column3)-sum(Column3==""), | |
| 1060 length(Column4)-sum(Column4==""), | |
| 1061 length(Column5)-sum(Column5==""), | |
| 1062 length(Column6)-sum(Column6==""), | |
| 1063 length(Column7)-sum(Column7==""), | |
| 1064 length(Column8)-sum(Column8==""), | |
| 1065 length(Column9)-sum(Column9==""), | |
| 1066 length(Column10)-sum(Column10==""), | |
| 1067 length(Column11)-sum(Column11==""), | |
| 1068 length(Column12)-sum(Column12==""), | |
| 1069 length(Column13)-sum(Column13==""), | |
| 1070 length(Column14)-sum(Column14==""), | |
| 1071 length(Column15)-sum(Column15=="")) | |
| 1072 | |
| 1073 for (z in 1:15) { | |
| 1074 for (y in 1:20) { | |
| 1075 if (PositionTable[y,z]>0){ | |
| 1076 EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y])) | |
| 1077 } | |
| 1078 if (PositionTable[y,z]==0){ | |
| 1079 EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y])) | |
| 1080 } | |
| 1081 } | |
| 1082 } | |
| 1083 #here I created the endogenous probability matrix | |
| 1084 #now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs | |
| 1085 | |
| 1086 | |
| 1087 | |
| 1088 | |
| 1089 | |
| 1090 # write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE) | |
| 1091 # write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE) | |
| 1092 # write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE) | |
| 1093 # write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE) | |
| 1094 # write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE) | |
| 1095 | |
| 1096 NormalizationScore<-c("Normalization Score",NormalizationScore) | |
| 1097 | |
| 1098 write.table(x=c("SD Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) | |
| 1099 write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) | |
| 1100 write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) | |
| 1101 write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE) | |
| 1102 | |
| 1103 EPMtableu<-EPMtable | |
| 1104 HeaderSD<-c(-7:7) | |
| 1105 EPMtableu<-rbind(HeaderSD,EPMtableu) | |
| 1106 row.names(EPMtableu)<-NULL | |
| 1107 EPMtableu<-data.frame(SetOfAAs,EPMtableu) | |
| 1108 | |
| 1109 write.table("Site Selectivity Matrix", file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE) | |
| 1110 SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1) | |
| 1111 head<-matrix(data=rep(" ",times=16),nrow = 1) | |
| 1112 SelectivityHeader<-rbind(head,SelectivityHeader) | |
| 1113 | |
| 1114 write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE) | |
| 1115 #colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7") | |
| 1116 write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE) | |
| 1117 write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) | |
| 1118 write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE) | |
| 1119 write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE) | |
| 1120 | |
| 1121 | |
| 1122 | |
| 1123 | |
| 1124 | |
| 1125 | |
| 1126 | |
| 1127 | |
| 1128 | |
| 1129 | |
| 1130 | |
| 1131 | |
| 1132 | |
| 1133 | |
| 1134 | |
| 1135 | |
| 1136 | |
| 1137 | |
| 1138 | |
| 1139 | |
| 1140 | |
| 1141 | |
| 1142 | |
| 1143 | |
| 1144 | |
| 1145 | |
| 1146 | |
| 1147 | |
| 1148 | |
| 1149 | |
| 1150 #test myself: this script should take in amino acids for each of the 9 positions and give out every single combination of those AAs | |
| 1151 | |
| 1152 #need to do following: fix it so that the accession numbers stay with the substrates, | |
| 1153 #also the neg false constant is totaly unphos'd Ys found by FASTA-2-CSV system# uniprot | |
| 1154 | |
| 1155 #HOW MANY: IF THERE'S two aas in each position you get 2^9, so I assume the numbers are: | |
| 1156 #(number in position-4)*(number in position -3)*(number in position -2)...=total | |
| 1157 # require(rJava) | |
| 1158 # require(xlsxjars) | |
| 1159 # require(xlsx) | |
| 1160 # # require(readxl) | |
| 1161 | |
| 1162 #View(SDtable) | |
| 1163 bareSDs<-SDtable[2:21,2:16] | |
| 1164 goodones<-bareSDs>2 | |
| 1165 | |
| 1166 # Positionm7<-which(goodones[,1] %in% TRUE) | |
| 1167 # if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))} | |
| 1168 # Positionm6<-which(goodones[,2] %in% TRUE) | |
| 1169 # if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))} | |
| 1170 # Positionm5<-which(goodones[,3] %in% TRUE) | |
| 1171 # if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))} | |
| 1172 # Positionm4<-which(goodones[,4] %in% TRUE) | |
| 1173 # if (length(Positionm4)<2){Positionm4<-bareSDs[,4][order(bareSDs[,4])[1:2]]} | |
| 1174 # Positionm3<-which(goodones[,5] %in% TRUE) | |
| 1175 # if (length(Positionm3)<2){Positionm3<-bareSDs[,5][order(bareSDs[,5])[1:2]]} | |
| 1176 # Positionm2<-which(goodones[,6] %in% TRUE) | |
| 1177 # if (length(Positionm2)<2){Positionm2<-bareSDs[,6][order(bareSDs[,6])[1:2]]} | |
| 1178 # Positionm1<-which(goodones[,7] %in% TRUE) | |
| 1179 # if (length(Positionm1)<2){Positionm1<-bareSDs[,7][order(bareSDs[,7])[1:2]]} | |
| 1180 # | |
| 1181 # Positiond0<-which(goodones[,8] %in% TRUE) | |
| 1182 # if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))} | |
| 1183 # | |
| 1184 # Positionp1<-which(goodones[,9] %in% TRUE) | |
| 1185 # if (length(Positionp1)<2){Positionp1<-bareSDs[,9][order(bareSDs[,9])[1:2]]} | |
| 1186 # Positionp2<-which(goodones[,10] %in% TRUE) | |
| 1187 # if (length(Positionp2)<2){Positionp2<-bareSDs[,10][order(bareSDs[,10])[1:2]]} | |
| 1188 # Positionp3<-which(goodones[,11] %in% TRUE) | |
| 1189 # if (length(Positionp3)<2){Positionp3<-bareSDs[,11][order(bareSDs[,11])[1:2]]} | |
| 1190 # Positionp4<-which(goodones[,12] %in% TRUE) | |
| 1191 # if (length(Positionp4)<2){Positionp4<-bareSDs[,12][order(bareSDs[,12])[1:2]]} | |
| 1192 # Positionp5<-which(goodones[,13] %in% TRUE) | |
| 1193 # if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))} | |
| 1194 # Positionp6<-which(goodones[,14] %in% TRUE) | |
| 1195 # if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))} | |
| 1196 # Positionp7<-which(goodones[,15] %in% TRUE) | |
| 1197 # if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))} | |
| 1198 | |
| 1199 | |
| 1200 | |
| 1201 | |
| 1202 # Positionm7<-which(goodones[,1] %in% TRUE) | |
| 1203 # if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))} | |
| 1204 # Positionm6<-which(goodones[,2] %in% TRUE) | |
| 1205 # if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))} | |
| 1206 # Positionm5<-which(goodones[,3] %in% TRUE) | |
| 1207 # if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))} | |
| 1208 # Positionm4<-which(goodones[,4] %in% TRUE) | |
| 1209 # if (length(Positionm4)<1){Positionm4<-which(bareSDs[,4]==max(bareSDs[,4]))} | |
| 1210 # Positionm3<-which(goodones[,5] %in% TRUE) | |
| 1211 # if (length(Positionm3)<1){Positionm3<-which(bareSDs[,5]==max(bareSDs[,5]))} | |
| 1212 # Positionm2<-which(goodones[,6] %in% TRUE) | |
| 1213 # if (length(Positionm2)<1){Positionm2<-which(bareSDs[,6]==max(bareSDs[,6]))} | |
| 1214 # Positionm1<-which(goodones[,7] %in% TRUE) | |
| 1215 # if (length(Positionm1)<1){Positionm1<-which(bareSDs[,7]==max(bareSDs[,7]))} | |
| 1216 # | |
| 1217 # Positiond0<-which(goodones[,8] %in% TRUE) | |
| 1218 # if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))} | |
| 1219 # | |
| 1220 # Positionp1<-which(goodones[,9] %in% TRUE) | |
| 1221 # if (length(Positionp1)<1){Positionp1<-which(bareSDs[,9]==max(bareSDs[,9]))} | |
| 1222 # Positionp2<-which(goodones[,10] %in% TRUE) | |
| 1223 # if (length(Positionp2)<1){Positionp2<-which(bareSDs[,10]==max(bareSDs[,10]))} | |
| 1224 # Positionp3<-which(goodones[,11] %in% TRUE) | |
| 1225 # if (length(Positionp3)<1){Positionp3<-which(bareSDs[,11]==max(bareSDs[,11]))} | |
| 1226 # Positionp4<-which(goodones[,12] %in% TRUE) | |
| 1227 # if (length(Positionp4)<1){Positionp4<-which(bareSDs[,12]==max(bareSDs[,12]))} | |
| 1228 # Positionp5<-which(goodones[,13] %in% TRUE) | |
| 1229 # if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))} | |
| 1230 # Positionp6<-which(goodones[,14] %in% TRUE) | |
| 1231 # if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))} | |
| 1232 # Positionp7<-which(goodones[,15] %in% TRUE) | |
| 1233 # if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))} | |
| 1234 | |
| 1235 match(c(bareSDs[,2][order(bareSDs[,2])[1:2]]),bareSDs[,2]) | |
| 1236 | |
| 1237 Positionm7<-which(goodones[,1] %in% TRUE) | |
| 1238 if (length(Positionm7)<3){Positionm7<-match(c(bareSDs[,1][order(bareSDs[,1])[19:20]]),bareSDs[,1])} | |
| 1239 Positionm6<-which(goodones[,2] %in% TRUE) | |
| 1240 if (length(Positionm6)<3){Positionm6<-match(c(bareSDs[,2][order(bareSDs[,2])[19:20]]),bareSDs[,2])} | |
| 1241 Positionm5<-which(goodones[,3] %in% TRUE) | |
| 1242 if (length(Positionm5)<3){Positionm5<-match(c(bareSDs[,3][order(bareSDs[,3])[19:20]]),bareSDs[,3])} | |
| 1243 Positionm4<-which(goodones[,4] %in% TRUE) | |
| 1244 if (length(Positionm4)<3){Positionm4<-match(c(bareSDs[,4][order(bareSDs[,4])[19:20]]),bareSDs[,4])} | |
| 1245 Positionm3<-which(goodones[,5] %in% TRUE) | |
| 1246 if (length(Positionm3)<3){Positionm3<-match(c(bareSDs[,5][order(bareSDs[,5])[19:20]]),bareSDs[,5])} | |
| 1247 Positionm2<-which(goodones[,6] %in% TRUE) | |
| 1248 if (length(Positionm2)<3){Positionm2<-match(c(bareSDs[,6][order(bareSDs[,6])[19:20]]),bareSDs[,6])} | |
| 1249 Positionm1<-which(goodones[,7] %in% TRUE) | |
| 1250 if (length(Positionm1)<3){Positionm1<-match(c(bareSDs[,7][order(bareSDs[,7])[19:20]]),bareSDs[,7])} | |
| 1251 | |
| 1252 Positiond0<-which(goodones[,8] %in% TRUE) | |
| 1253 #if (length(Positiond0)<3){Positiond0<-bareSDs[,8][order(bareSDs[,8])[1:2]]} | |
| 1254 | |
| 1255 Positionp1<-which(goodones[,9] %in% TRUE) | |
| 1256 if (length(Positionp1)<3){Positionp1<-match(c(bareSDs[,9][order(bareSDs[,9])[19:20]]),bareSDs[,9])} | |
| 1257 Positionp2<-which(goodones[,10] %in% TRUE) | |
| 1258 if (length(Positionp2)<3){Positionp2<-match(c(bareSDs[,10][order(bareSDs[,10])[19:20]]),bareSDs[,10])} | |
| 1259 Positionp3<-which(goodones[,11] %in% TRUE) | |
| 1260 if (length(Positionp3)<3){Positionp3<-match(c(bareSDs[,11][order(bareSDs[,11])[19:20]]),bareSDs[,11])} | |
| 1261 Positionp4<-which(goodones[,12] %in% TRUE) | |
| 1262 if (length(Positionp4)<3){Positionp4<-match(c(bareSDs[,12][order(bareSDs[,12])[19:20]]),bareSDs[,12])} | |
| 1263 Positionp5<-which(goodones[,13] %in% TRUE) | |
| 1264 if (length(Positionp5)<3){Positionp5<-match(c(bareSDs[,13][order(bareSDs[,13])[19:20]]),bareSDs[,13])} | |
| 1265 Positionp6<-which(goodones[,14] %in% TRUE) | |
| 1266 if (length(Positionp6)<3){Positionp6<-match(c(bareSDs[,14][order(bareSDs[,14])[19:20]]),bareSDs[,14])} | |
| 1267 Positionp7<-which(goodones[,15] %in% TRUE) | |
| 1268 if (length(Positionp7)<3){Positionp7<-match(c(bareSDs[,15][order(bareSDs[,15])[19:20]]),bareSDs[,15])} | |
| 1269 | |
| 1270 | |
| 1271 aa_props2 <- c("1"="A", "2"="C", "3"="D", "4"="E", "5"="F", "6"="G", "7"="H", "8"="I", "9"="K", "10"="L", "11"="M", "12"="N", | |
| 1272 "13"="P", "14"="Q", "15"="R", "16"="S", "17"="T", "18"="V", "19"="W", "20"="Y") | |
| 1273 | |
| 1274 Positionm7<-sapply(Positionm7, function (x) aa_props2[x]) | |
| 1275 Positionm6<-sapply(Positionm6, function (x) aa_props2[x]) | |
| 1276 Positionm5<-sapply(Positionm5, function (x) aa_props2[x]) | |
| 1277 Positionm4<-sapply(Positionm4, function (x) aa_props2[x]) | |
| 1278 Positionm3<-sapply(Positionm3, function (x) aa_props2[x]) | |
| 1279 Positionm2<-sapply(Positionm2, function (x) aa_props2[x]) | |
| 1280 Positionm1<-sapply(Positionm1, function (x) aa_props2[x]) | |
| 1281 Positiond0<-sapply(Positiond0, function (x) aa_props2[x]) | |
| 1282 Positionp1<-sapply(Positionp1, function (x) aa_props2[x]) | |
| 1283 Positionp2<-sapply(Positionp2, function (x) aa_props2[x]) | |
| 1284 Positionp3<-sapply(Positionp3, function (x) aa_props2[x]) | |
| 1285 Positionp4<-sapply(Positionp4, function (x) aa_props2[x]) | |
| 1286 Positionp5<-sapply(Positionp5, function (x) aa_props2[x]) | |
| 1287 Positionp6<-sapply(Positionp6, function (x) aa_props2[x]) | |
| 1288 Positionp7<-sapply(Positionp7, function (x) aa_props2[x]) | |
| 1289 | |
| 1290 | |
| 1291 # Positionm7<-c("D","H","N","V") | |
| 1292 # Positionm6<-c("E","V") | |
| 1293 # Positionm5<-c("D","H") | |
| 1294 # Positionm4<-c("D","N") | |
| 1295 # Positionm3<-c("D","E","F","Q") | |
| 1296 # Positionm2<-c("D","N","Q","S") | |
| 1297 # Positionm1<-c("F","I","L") | |
| 1298 # Positiond0<-c("Y") | |
| 1299 # Positionp1<-c("A","E") | |
| 1300 # Positionp2<-c("T","S","Q","E") | |
| 1301 # Positionp3<-c("V") | |
| 1302 # Positionp4<-c("K") | |
| 1303 # Positionp5<-c("K") | |
| 1304 # Positionp6<-c("K") | |
| 1305 # Positionp7<-c("R") | |
| 1306 #this is where the amino acids for each position are given. m means minus, p mean plus | |
| 1307 ######################################## | |
| 1308 # ScreenerFilename<-"C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls" | |
| 1309 | |
| 1310 | |
| 1311 | |
| 1312 | |
| 1313 | |
| 1314 | |
| 1315 | |
| 1316 | |
| 1317 | |
| 1318 | |
| 1319 | |
| 1320 screaner<-read.csv(ScreenerFilename, header = FALSE, stringsAsFactors = FALSE) | |
| 1321 | |
| 1322 | |
| 1323 AKT<-screaner[2:25,] | |
| 1324 AMPK<-screaner[27:50,] | |
| 1325 CDK1<-screaner[52:75,] | |
| 1326 CHK1<-screaner[77:100,] | |
| 1327 CK2A1<-screaner[102:125,] | |
| 1328 LKB1<-screaner[127:150,] | |
| 1329 MAPK3<-screaner[152:175,] | |
| 1330 MAPK13<-screaner[177:200,] | |
| 1331 MTOR<-screaner[202:225,] | |
| 1332 # Pyk2<-screaner[227:250,] | |
| 1333 # Src<-screaner[252:275,] | |
| 1334 # Syk<-screaner[277:300,] | |
| 1335 # Yes<-screaner[302:325,] | |
| 1336 | |
| 1337 #two questions: why are we doing CDK1 when we already have a bioninformatics page about it? | |
| 1338 #two I reran everything and only get 96 positions of interest in the SD table | |
| 1339 | |
| 1340 | |
| 1341 | |
| 1342 | |
| 1343 | |
| 1344 | |
| 1345 | |
| 1346 | |
| 1347 | |
| 1348 #Do_You_want_An_Excel_Output_Questionmark<-"NO" | |
| 1349 GeneratedPeptidesFile<-"GeneratedPeptidesFile.csv" | |
| 1350 | |
| 1351 | |
| 1352 # AKT<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 4) | |
| 1353 # AMPK<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 5) | |
| 1354 # CDK1<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 6) | |
| 1355 # CHK1<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 7) | |
| 1356 # CK2A1<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 8) | |
| 1357 # LKB1<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 9) | |
| 1358 # MAPK3<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 10) | |
| 1359 # MAPK13<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 11) | |
| 1360 # MTOR<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 12) | |
| 1361 # Pyk2<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 13) | |
| 1362 # Src<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 14) | |
| 1363 # Syk<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 15) | |
| 1364 # Yes<-read.xlsx("C:/Users/User Name/Desktop/UMN 2017 Spring/Parker Lab/Kinatest ID/Screener.xls", sheetIndex = 16) | |
| 1365 # | |
| 1366 | |
| 1367 | |
| 1368 "A"=1 | |
| 1369 "C"=2 | |
| 1370 "D"=3 | |
| 1371 "E"=4 | |
| 1372 "F"=5 | |
| 1373 "G"=6 | |
| 1374 "H"=7 | |
| 1375 "I"=8 | |
| 1376 "K"=9 | |
| 1377 "L"=10 | |
| 1378 "M"=11 | |
| 1379 "N"=12 | |
| 1380 "P"=13 | |
| 1381 "Q"=14 | |
| 1382 "R"=15 | |
| 1383 "S"=16 | |
| 1384 "T"=17 | |
| 1385 "V"=18 | |
| 1386 "W"=19 | |
| 1387 "Y"=20 | |
| 1388 | |
| 1389 aa_props <- c("A"=A, "C"=C, "D"=D, "E"=E, "F"=F,"G"=G,"H"=H,"I"=I,"K"=K,"L"=L,"M"=M,"N"=N,"P"=P,"Q"=Q,"R"=R, | |
| 1390 "S"=S,"T"=T,"V"=V,"W"=W,"Y"=Y,"xY"=Y,"O"=21) | |
| 1391 | |
| 1392 number15<-sapply(Positionm7, function (x) aa_props[x]) | |
| 1393 number14<-sapply(Positionm6, function (x) aa_props[x]) | |
| 1394 number13<-sapply(Positionm5, function (x) aa_props[x]) | |
| 1395 number1 <- sapply(Positionm4, function (x) aa_props[x]) | |
| 1396 number2 <- sapply(Positionm3, function (x) aa_props[x]) | |
| 1397 number3 <- sapply(Positionm2, function (x) aa_props[x]) | |
| 1398 number4 <- sapply(Positionm1, function (x) aa_props[x]) | |
| 1399 number5 <- sapply(Positiond0, function (x) aa_props[x]) | |
| 1400 number6 <- sapply(Positionp1, function (x) aa_props[x]) | |
| 1401 number7 <- sapply(Positionp2, function (x) aa_props[x]) | |
| 1402 number8 <- sapply(Positionp3, function (x) aa_props[x]) | |
| 1403 number9 <- sapply(Positionp4, function (x) aa_props[x]) | |
| 1404 number10<-sapply(Positionp5, function (x) aa_props[x]) | |
| 1405 number11<-sapply(Positionp6, function (x) aa_props[x]) | |
| 1406 number12<-sapply(Positionp7, function (x) aa_props[x]) | |
| 1407 | |
| 1408 # number1<-Positionm4 | |
| 1409 # number2<-Positionm3 | |
| 1410 # number3<-Positionm2 | |
| 1411 # number4<-Positionm1 | |
| 1412 # number5<-Positiond0 | |
| 1413 # number6<-Positionp1 | |
| 1414 # number7<-Positionp2 | |
| 1415 # number8<-Positionp3 | |
| 1416 # number9<-Positionp4 | |
| 1417 | |
| 1418 ############################# | |
| 1419 ######################################## | |
| 1420 | |
| 1421 | |
| 1422 total=length(Positionp7)*length(Positionp6)*length(Positionp5)*length(Positionp4)*length(Positionp3)*(length(Positionp2))*length(Positionp1)* | |
| 1423 length(Positiond0)*length(Positionm1)*length(Positionm2)*length(Positionm3)*length(Positionm4)*length(Positionm5)*length(Positionm6)*length(Positionm7) | |
| 1424 #this is just a way to doublecheck that the length of the generated peptides vector is correct | |
| 1425 | |
| 1426 GeneratedPeptides<-rep(NA, times=total*15) | |
| 1427 GeneratedPeptides<-matrix(data = GeneratedPeptides,ncol = 15) | |
| 1428 | |
| 1429 NumeratedPeptides<-GeneratedPeptides | |
| 1430 #create an empty vector of correct length by finding the number of each AAs per position and multiplying them | |
| 1431 count<-0 | |
| 1432 for (t in 1:length(Positionm7)) { | |
| 1433 for (s in 1:length(Positionm6)) { | |
| 1434 for (r in 1:length(Positionm5)) { | |
| 1435 for (i in 1:length(Positionm4)) { | |
| 1436 for (j in 1:length(Positionm3)) { | |
| 1437 for (k in 1:length(Positionm2)) { | |
| 1438 for (l in 1:length(Positionm1)) { | |
| 1439 for (m in 1:length(Positiond0)) { | |
| 1440 for (n in 1:length(Positionp1)) { | |
| 1441 for (o in 1:length(Positionp2)) { | |
| 1442 for (p in 1:length(Positionp3)) { | |
| 1443 for (q in 1:length(Positionp4)) { | |
| 1444 for (u in 1:length(Positionp5)) { | |
| 1445 for (v in 1:length(Positionp6)) { | |
| 1446 for (w in 1:length(Positionp7)) { | |
| 1447 # i=1 | |
| 1448 # j=1 | |
| 1449 # k=1 | |
| 1450 # l=1 | |
| 1451 # m=1 | |
| 1452 # n=1 | |
| 1453 # o=1 | |
| 1454 # p=1 | |
| 1455 # q=1 | |
| 1456 # | |
| 1457 #for every single position, increment the count number, create a peptide using the AAs at that position | |
| 1458 #then put them together into the generated peptides sequencex | |
| 1459 count<-count+1 | |
| 1460 tabulation<-c(Positionm7[t],Positionm6[s],Positionm5[r],Positionm4[i],Positionm3[j],Positionm2[k],Positionm1[l],Positiond0[m],Positionp1[n], | |
| 1461 Positionp2[o],Positionp3[p],Positionp4[q],Positionp5[u],Positionp6[v],Positionp7[w]) | |
| 1462 numeration<-c(number15[t],number14[s],number13[r],number1[i],number2[j],number3[k],number4[l],number5[m],number6[n],number7[o],number8[p],number9[q],number10[u],number11[v], | |
| 1463 number12[w]) | |
| 1464 #tabulation<-paste(tabulation, sep="", collapse="") | |
| 1465 GeneratedPeptides[count,1:15]<-tabulation | |
| 1466 NumeratedPeptides[count,1:15]<-numeration | |
| 1467 } | |
| 1468 } | |
| 1469 } | |
| 1470 } | |
| 1471 } | |
| 1472 } | |
| 1473 } | |
| 1474 } | |
| 1475 } | |
| 1476 } | |
| 1477 } | |
| 1478 } | |
| 1479 } | |
| 1480 } | |
| 1481 } | |
| 1482 #################################################################### | |
| 1483 #now here I use the Endogenous Probabilty matrix from the previous script, which is called EMPtable | |
| 1484 #to score the created peptides | |
| 1485 ThisKinTable<-EPMtableu#[1:nrow(SDtable),] | |
| 1486 TKTcolumn<-c(data=rep(1,times=21)) | |
| 1487 TKTcolumn<-as.matrix(TKTcolumn,ncol=1) | |
| 1488 ThisKinTable<-cbind(TKTcolumn,ThisKinTable) | |
| 1489 | |
| 1490 ThisKinGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
| 1491 ThisKinGenWeirdScore<-rep(NA,times=nrow(GeneratedPeptides)) | |
| 1492 | |
| 1493 for (x in 1:nrow(GeneratedPeptides)){ | |
| 1494 Scoringpeptide<-NumeratedPeptides[x,1:15] | |
| 1495 Scoringpeptide<-Scoringpeptide+1 | |
| 1496 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* | |
| 1497 ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]* | |
| 1498 #ThisKinTable[as.numeric(Scoringpeptide[8]),10]* | |
| 1499 ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]* | |
| 1500 ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17] | |
| 1501 ThisKinGeneratedScores[x]<-ThisKinTableScore | |
| 1502 ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2]))) | |
| 1503 ThisKinGenWeirdScore[x]<-ThisKinTableScore | |
| 1504 } | |
| 1505 | |
| 1506 AKTGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
| 1507 AMPKGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
| 1508 CDK1GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
| 1509 CHK1GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
| 1510 CK2A1GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
| 1511 LKB1GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
| 1512 MAPK3GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
| 1513 MAPK13GeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
| 1514 MTORGeneratedScores<-rep(NA,times=nrow(GeneratedPeptides)) | |
| 1515 | |
| 1516 for (x in 1:nrow(GeneratedPeptides)){ | |
| 1517 Scoringpeptide<-NumeratedPeptides[x,1:15] | |
| 1518 AKTScore<-AKT[Scoringpeptide[1],2]*AKT[Scoringpeptide[2],3]*AKT[Scoringpeptide[3],4]*AKT[Scoringpeptide[4],5]*AKT[Scoringpeptide[5],6]*AKT[Scoringpeptide[6],7]* | |
| 1519 AKT[Scoringpeptide[7],8]*AKT[Scoringpeptide[9],10]*AKT[Scoringpeptide[10],11]*AKT[Scoringpeptide[11],12]*AKT[Scoringpeptide[12],13]* | |
| 1520 AKT[Scoringpeptide[13],14]*AKT[Scoringpeptide[14],15]*AKT[Scoringpeptide[15],16] | |
| 1521 AKTGeneratedScores[x]<-AKTScore | |
| 1522 | |
| 1523 AMPKScore<-AMPK[Scoringpeptide[1],2]*AMPK[Scoringpeptide[2],3]*AMPK[Scoringpeptide[3],4]*AMPK[Scoringpeptide[4],5]*AMPK[Scoringpeptide[5],6]*AMPK[Scoringpeptide[6],7]* | |
| 1524 AMPK[Scoringpeptide[7],8]*AMPK[Scoringpeptide[9],10]*AMPK[Scoringpeptide[10],11]*AMPK[Scoringpeptide[11],12]*AMPK[Scoringpeptide[12],13]* | |
| 1525 AMPK[Scoringpeptide[13],14]*AMPK[Scoringpeptide[14],15]*AMPK[Scoringpeptide[15],16] | |
| 1526 AMPKGeneratedScores[x]<-AMPKScore | |
| 1527 | |
| 1528 CDK1Score<-CDK1[Scoringpeptide[1],2]*CDK1[Scoringpeptide[2],3]*CDK1[Scoringpeptide[3],4]*CDK1[Scoringpeptide[4],5]*CDK1[Scoringpeptide[5],6]*CDK1[Scoringpeptide[6],7]* | |
| 1529 CDK1[Scoringpeptide[7],8]*CDK1[Scoringpeptide[9],10]*CDK1[Scoringpeptide[10],11]*CDK1[Scoringpeptide[11],12]*CDK1[Scoringpeptide[12],13]* | |
| 1530 CDK1[Scoringpeptide[13],14]*CDK1[Scoringpeptide[14],15]*CDK1[Scoringpeptide[15],16] | |
| 1531 CDK1GeneratedScores[x]<-CDK1Score | |
| 1532 | |
| 1533 CHK1Score<-CHK1[Scoringpeptide[1],2]*CHK1[Scoringpeptide[2],3]*CHK1[Scoringpeptide[3],4]*CHK1[Scoringpeptide[4],5]*CHK1[Scoringpeptide[5],6]*CHK1[Scoringpeptide[6],7]* | |
| 1534 CHK1[Scoringpeptide[7],8]*CHK1[Scoringpeptide[9],10]*CHK1[Scoringpeptide[10],11]*CHK1[Scoringpeptide[11],12]*CHK1[Scoringpeptide[12],13]* | |
| 1535 CHK1[Scoringpeptide[13],14]*CHK1[Scoringpeptide[14],15]*CHK1[Scoringpeptide[15],16] | |
| 1536 CHK1GeneratedScores[x]<-CHK1Score | |
| 1537 | |
| 1538 CK2A1Score<-CK2A1[Scoringpeptide[1],2]*CK2A1[Scoringpeptide[2],3]*CK2A1[Scoringpeptide[3],4]*CK2A1[Scoringpeptide[4],5]*CK2A1[Scoringpeptide[5],6]*CK2A1[Scoringpeptide[6],7]* | |
| 1539 CK2A1[Scoringpeptide[7],8]*CK2A1[Scoringpeptide[9],10]*CK2A1[Scoringpeptide[10],11]*CK2A1[Scoringpeptide[11],12]*CK2A1[Scoringpeptide[12],13]* | |
| 1540 CK2A1[Scoringpeptide[13],14]*CK2A1[Scoringpeptide[14],15]*CK2A1[Scoringpeptide[15],16] | |
| 1541 CK2A1GeneratedScores[x]<-CK2A1Score | |
| 1542 | |
| 1543 LKB1Score<-LKB1[Scoringpeptide[1],2]*LKB1[Scoringpeptide[2],3]*LKB1[Scoringpeptide[3],4]*LKB1[Scoringpeptide[4],5]*LKB1[Scoringpeptide[5],6]*LKB1[Scoringpeptide[6],7]* | |
| 1544 LKB1[Scoringpeptide[7],8]*LKB1[Scoringpeptide[9],10]*LKB1[Scoringpeptide[10],11]*LKB1[Scoringpeptide[11],12]*LKB1[Scoringpeptide[12],13]* | |
| 1545 LKB1[Scoringpeptide[13],14]*LKB1[Scoringpeptide[14],15]*LKB1[Scoringpeptide[15],16] | |
| 1546 LKB1GeneratedScores[x]<-LKB1Score | |
| 1547 | |
| 1548 MAPK3Score<-MAPK3[Scoringpeptide[1],2]*MAPK3[Scoringpeptide[2],3]*MAPK3[Scoringpeptide[3],4]*MAPK3[Scoringpeptide[4],5]*MAPK3[Scoringpeptide[5],6]*MAPK3[Scoringpeptide[6],7]* | |
| 1549 MAPK3[Scoringpeptide[7],8]*MAPK3[Scoringpeptide[9],10]*MAPK3[Scoringpeptide[10],11]*MAPK3[Scoringpeptide[11],12]*MAPK3[Scoringpeptide[12],13]* | |
| 1550 MAPK3[Scoringpeptide[13],14]*MAPK3[Scoringpeptide[14],15]*MAPK3[Scoringpeptide[15],16] | |
| 1551 MAPK3GeneratedScores[x]<-MAPK3Score | |
| 1552 | |
| 1553 MAPK13Score<-MAPK13[Scoringpeptide[1],2]*MAPK13[Scoringpeptide[2],3]*MAPK13[Scoringpeptide[3],4]*MAPK13[Scoringpeptide[4],5]*MAPK13[Scoringpeptide[5],6]*MAPK13[Scoringpeptide[6],7]* | |
| 1554 MAPK13[Scoringpeptide[7],8]*MAPK13[Scoringpeptide[9],10]*MAPK13[Scoringpeptide[10],11]*MAPK13[Scoringpeptide[11],12]*MAPK13[Scoringpeptide[12],13]* | |
| 1555 MAPK13[Scoringpeptide[13],14]*MAPK13[Scoringpeptide[14],15]*MAPK13[Scoringpeptide[15],16] | |
| 1556 MAPK13GeneratedScores[x]<-MAPK13Score | |
| 1557 | |
| 1558 MTORScore<-MTOR[Scoringpeptide[1],2]*MTOR[Scoringpeptide[2],3]*MTOR[Scoringpeptide[3],4]*MTOR[Scoringpeptide[4],5]*MTOR[Scoringpeptide[5],6]*MTOR[Scoringpeptide[6],7]* | |
| 1559 MTOR[Scoringpeptide[7],8]*MTOR[Scoringpeptide[9],10]*MTOR[Scoringpeptide[10],11]*MTOR[Scoringpeptide[11],12]*MTOR[Scoringpeptide[12],13]* | |
| 1560 MTOR[Scoringpeptide[13],14]*MTOR[Scoringpeptide[14],15]*MTOR[Scoringpeptide[15],16] | |
| 1561 MTORGeneratedScores[x]<-MTORScore | |
| 1562 | |
| 1563 # Pyk2Score<-Pyk2[Scoringpeptide[1],2]*Pyk2[Scoringpeptide[2],3]*Pyk2[Scoringpeptide[3],4]*Pyk2[Scoringpeptide[4],5]*Pyk2[Scoringpeptide[5],6]*Pyk2[Scoringpeptide[6],7]* | |
| 1564 # Pyk2[Scoringpeptide[7],8]*Pyk2[Scoringpeptide[9],10]*Pyk2[Scoringpeptide[10],11]*Pyk2[Scoringpeptide[11],12]*Pyk2[Scoringpeptide[12],13]* | |
| 1565 # Pyk2[Scoringpeptide[13],14]*Pyk2[Scoringpeptide[14],15]*Pyk2[Scoringpeptide[15],16] | |
| 1566 # Pyk2GeneratedScores[x]<-Pyk2Score | |
| 1567 # | |
| 1568 # SrcScore<-Src[Scoringpeptide[1],2]*Src[Scoringpeptide[2],3]*Src[Scoringpeptide[3],4]*Src[Scoringpeptide[4],5]*Src[Scoringpeptide[5],6]*Src[Scoringpeptide[6],7]* | |
| 1569 # Src[Scoringpeptide[7],8]*Src[Scoringpeptide[9],10]*Src[Scoringpeptide[10],11]*Src[Scoringpeptide[11],12]*Src[Scoringpeptide[12],13]* | |
| 1570 # Src[Scoringpeptide[13],14]*Src[Scoringpeptide[14],15]*Src[Scoringpeptide[15],16] | |
| 1571 # SrcGeneratedScores[x]<-SrcScore | |
| 1572 # | |
| 1573 # SykScore<-Syk[Scoringpeptide[1],2]*Syk[Scoringpeptide[2],3]*Syk[Scoringpeptide[3],4]*Syk[Scoringpeptide[4],5]*Syk[Scoringpeptide[5],6]*Syk[Scoringpeptide[6],7]* | |
| 1574 # Syk[Scoringpeptide[7],8]*Syk[Scoringpeptide[9],10]*Syk[Scoringpeptide[10],11]*Syk[Scoringpeptide[11],12]*Syk[Scoringpeptide[12],13]* | |
| 1575 # Syk[Scoringpeptide[13],14]*Syk[Scoringpeptide[14],15]*Syk[Scoringpeptide[15],16] | |
| 1576 # SykGeneratedScores[x]<-SykScore | |
| 1577 # | |
| 1578 # YesScore<-Yes[Scoringpeptide[1],2]*Yes[Scoringpeptide[2],3]*Yes[Scoringpeptide[3],4]*Yes[Scoringpeptide[4],5]*Yes[Scoringpeptide[5],6]*Yes[Scoringpeptide[6],7]* | |
| 1579 # Yes[Scoringpeptide[7],8]*Yes[Scoringpeptide[9],10]*Yes[Scoringpeptide[10],11]*Yes[Scoringpeptide[11],12]*Yes[Scoringpeptide[12],13]* | |
| 1580 # Yes[Scoringpeptide[13],14]*Yes[Scoringpeptide[14],15]*Yes[Scoringpeptide[15],16] | |
| 1581 # YesGeneratedScores[x]<-YesScore | |
| 1582 | |
| 1583 # ThisKinTableScore<-ThisKinTable[as.numeric(Scoringpeptide[1]),3]*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* | |
| 1584 # ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]* | |
| 1585 # ThisKinTable[as.numeric(Scoringpeptide[7]),9]*ThisKinTable[as.numeric(Scoringpeptide[8]),10]*ThisKinTable[as.numeric(Scoringpeptide[9]),11] | |
| 1586 # ThisKinGeneratedScores[x]<-ThisKinTableScore | |
| 1587 } | |
| 1588 | |
| 1589 | |
| 1590 | |
| 1591 AKTNorm<-1/as.numeric(AKT[22,1]) | |
| 1592 AKTThresh<-as.numeric(AKT[24,1]) | |
| 1593 AKTTrueThresh<-((AKTThresh*AKTNorm)/(100-AKTThresh)) | |
| 1594 AKTActive<-unlist(AKTGeneratedScores)>AKTTrueThresh | |
| 1595 if (TodaysKinase=="AKT"){AKTActive<-rep(0,times=nrow(GeneratedPeptides))} | |
| 1596 | |
| 1597 AMPKNorm<-1/as.numeric(AMPK[22,1]) | |
| 1598 AMPKThresh<-as.numeric(AMPK[24,1]) | |
| 1599 AMPKTrueThresh<-((AMPKThresh*AMPKNorm)/(100-AMPKThresh)) | |
| 1600 AMPKActive<-unlist(AMPKGeneratedScores)>AMPKTrueThresh | |
| 1601 if (TodaysKinase=="AMPK"){AMPKActive<-rep(0,times=nrow(GeneratedPeptides))} | |
| 1602 | |
| 1603 CDK1Norm<-1/as.numeric(CDK1[22,1]) | |
| 1604 CDK1Thresh<-as.numeric(CDK1[24,1]) | |
| 1605 CDK1TrueThresh<-((CDK1Thresh*CDK1Norm)/(100-CDK1Thresh)) | |
| 1606 CDK1Active<-unlist(CDK1GeneratedScores)>CDK1TrueThresh | |
| 1607 if (TodaysKinase=="CDK1"){CDK1Active<-rep(0,times=nrow(GeneratedPeptides))} | |
| 1608 | |
| 1609 CHK1Norm<-1/as.numeric(CHK1[22,1]) | |
| 1610 CHK1Thresh<-as.numeric(CHK1[24,1]) | |
| 1611 CHK1TrueThresh<-((CHK1Thresh*CHK1Norm)/(100-CHK1Thresh)) | |
| 1612 CHK1Active<-(CHK1GeneratedScores)>CHK1TrueThresh | |
| 1613 if (TodaysKinase=="CHK1"){CHK1Active<-rep(0,times=nrow(GeneratedPeptides))} | |
| 1614 | |
| 1615 CK2A1Norm<-1/as.numeric(CK2A1[22,1]) | |
| 1616 CK2A1Thresh<-as.numeric(CK2A1[24,1]) | |
| 1617 CK2A1TrueThresh<-((CK2A1Thresh*CK2A1Norm)/(100-CK2A1Thresh)) | |
| 1618 CK2A1Active<-unlist(CK2A1GeneratedScores)>CK2A1TrueThresh | |
| 1619 if (TodaysKinase=="CK2A1"){CK2A1Active<-rep(0,times=nrow(GeneratedPeptides))} | |
| 1620 | |
| 1621 LKB1Norm<-1/as.numeric(LKB1[22,1]) | |
| 1622 LKB1Thresh<-as.numeric(LKB1[24,1]) | |
| 1623 LKB1TrueThresh<-((LKB1Thresh*LKB1Norm)/(100-LKB1Thresh)) | |
| 1624 LKB1Active<-unlist(LKB1GeneratedScores)>LKB1TrueThresh | |
| 1625 if (TodaysKinase=="LKB1"){LKB1Active<-rep(0,times=nrow(GeneratedPeptides))} | |
| 1626 | |
| 1627 MAPK3Norm<-1/as.numeric(MAPK3[22,1]) | |
| 1628 MAPK3Thresh<-as.numeric(MAPK3[24,1]) | |
| 1629 MAPK3TrueThresh<-((MAPK3Thresh*MAPK3Norm)/(100-MAPK3Thresh)) | |
| 1630 MAPK3Active<-unlist(MAPK3GeneratedScores)>MAPK3TrueThresh | |
| 1631 if (TodaysKinase=="MAPK3"){MAPK3Active<-rep(0,times=nrow(GeneratedPeptides))} | |
| 1632 | |
| 1633 MAPK13Norm<-1/as.numeric(MAPK13[22,1]) | |
| 1634 MAPK13Thresh<-as.numeric(MAPK13[24,1]) | |
| 1635 MAPK13TrueThresh<-((MAPK13Thresh*MAPK13Norm)/(100-MAPK13Thresh)) | |
| 1636 MAPK13Active<-unlist(MAPK13GeneratedScores)>MAPK13TrueThresh | |
| 1637 if (TodaysKinase=="MAPK13"){MAPK13Active<-rep(0,times=nrow(GeneratedPeptides))} | |
| 1638 | |
| 1639 MTORNorm<-1/as.numeric(MTOR[22,1]) | |
| 1640 MTORThresh<-as.numeric(MTOR[24,1]) | |
| 1641 MTORTrueThresh<-((MTORThresh*MTORNorm)/(100-MTORThresh)) | |
| 1642 MTORActive<-unlist(MTORGeneratedScores)>MTORTrueThresh | |
| 1643 if (TodaysKinase=="MTOR"){MTORActive<-rep(0,times=nrow(GeneratedPeptides))} | |
| 1644 | |
| 1645 # Pyk2Norm<-1/as.numeric(Pyk2[22,1]) | |
| 1646 # Pyk2Thresh<-as.numeric(Pyk2[24,1]) | |
| 1647 # Pyk2TrueThresh<-((Pyk2Thresh*Pyk2Norm)/(100-Pyk2Thresh)) | |
| 1648 # Pyk2Active<-unlist(Pyk2GeneratedScores)>Pyk2TrueThresh | |
| 1649 # if (TodaysKinase=="PYK2"){Pyk2Active<-rep(0,times=nrow(GeneratedPeptides))} | |
| 1650 # | |
| 1651 # SrcNorm<-1/as.numeric(Src[22,1]) | |
| 1652 # SrcThresh<-as.numeric(Src[24,1]) | |
| 1653 # SrcTrueThresh<-((SrcThresh*SrcNorm)/(100-SrcThresh)) | |
| 1654 # SrcActive<-unlist(SrcGeneratedScores)>SrcTrueThresh | |
| 1655 # if (TodaysKinase=="SRC"){SrcActive<-rep(0,times=nrow(GeneratedPeptides))} | |
| 1656 # | |
| 1657 # SykNorm<-1/as.numeric(Syk[22,1]) | |
| 1658 # SykThresh<-as.numeric(Syk[24,1]) | |
| 1659 # SykTrueThresh<-((SykThresh*SykNorm)/(100-SykThresh)) | |
| 1660 # SykActive<-unlist(SykGeneratedScores)>SykTrueThresh | |
| 1661 # if (TodaysKinase=="SYK"){SykActive<-rep(0,times=nrow(GeneratedPeptides))} | |
| 1662 # | |
| 1663 # YesNorm<-1/as.numeric(Yes[22,1]) | |
| 1664 # YesThresh<-as.numeric(Yes[24,1]) | |
| 1665 # YesTrueThresh<-((YesThresh*YesNorm)/(100-YesThresh)) | |
| 1666 # YesActive<-unlist(YesGeneratedScores)>YesTrueThresh | |
| 1667 # if (TodaysKinase=="YES"){YesActive<-rep(0,times=nrow(GeneratedPeptides))} | |
| 1668 | |
| 1669 AllActive<-AKTActive+AMPKActive+CDK1Active+CHK1Active+CK2A1Active+LKB1Active+MAPK3Active+MAPK13Active+MTORActive | |
| 1670 | |
| 1671 Scores<-ThisKinGeneratedScores | |
| 1672 ThresholdValues<-ThisKinGenWeirdScore | |
| 1673 | |
| 1674 FullMotifs<-rep("Z",times=nrow(GeneratedPeptides)) | |
| 1675 for (i in 1:nrow(GeneratedPeptides)) { | |
| 1676 motif<-GeneratedPeptides[i,1:15] | |
| 1677 motif<-paste(motif,sep = "", collapse = "") | |
| 1678 FullMotifs[i]<-motif | |
| 1679 } | |
| 1680 | |
| 1681 PeptidesWithRanks<-cbind.data.frame(FullMotifs,GeneratedPeptides,Scores,ThresholdValues) | |
| 1682 PeptidesWithRanks<-cbind.data.frame(PeptidesWithRanks,AllActive,AKTActive,AMPKActive,CDK1Active,CHK1Active,CK2A1Active,LKB1Active,MAPK3Active,MAPK13Active,MTORActive) | |
| 1683 RanksPeptides<-PeptidesWithRanks[order(PeptidesWithRanks$ThresholdValues,decreasing = FALSE),] | |
| 1684 # PepRankHead<-c(1:9,"Sequence","RPMS","PMS") | |
| 1685 # RanksPeptides<-rbind.data.frame(PepRankHead,PeptidesWithRanks) | |
| 1686 #head(RanksPeptides) | |
| 1687 | |
| 1688 | |
| 1689 #now I have to score the negative sequences... for some reason | |
| 1690 #write up how we transfect with lipofectamine | |
| 1691 #3,4,5 questions | |
| 1692 | |
| 1693 #PAUSED EHRE AT 4:50, HOPING THAT FIXING MINERVOTHING SO THAT LEFT SPACES WORKS FIXES A THING. OTHERWISE | |
| 1694 #I FUCKED WITH THE MCC TABLE AND NEED TO FINISH IT | |
| 1695 | |
| 1696 ThisKinBlanks<-rep(1,times=17) | |
| 1697 #indx <- sapply(breast, is.factor) | |
| 1698 #ThisKinTable[indx] <- lapply(ThisKinTable[indx], function(x) as.character(x)) | |
| 1699 ThisKinTable$SetOfAAs<-as.character(ThisKinTable$SetOfAAs) | |
| 1700 | |
| 1701 #ThisKinTest<-rbind.data.frame(ThisKinTable,ThisKinBlanks) | |
| 1702 ThisKinTable<-rbind.data.frame(ThisKinTable,ThisKinBlanks) | |
| 1703 | |
| 1704 NegativeScores<-rep(NA,times=nrow(NegativeSubstrateList)) | |
| 1705 NegativeWeirdScores<-rep(NA,times=nrow(NegativeSubstrateList)) | |
| 1706 for (v in 1:nrow(NegativeSubstrateList)) { | |
| 1707 motif<-NegativeSubstrateList[v,2] | |
| 1708 motif<-unlist(strsplit(motif,"")) | |
| 1709 #if (length(motif)<9){print(v)}} | |
| 1710 # motif[1] <- sapply(motif[1], function (x) aa_props[x]) | |
| 1711 # motif[2] <- sapply(motif[2], function (x) aa_props[x]) | |
| 1712 # motif[3] <- sapply(motif[3], function (x) aa_props[x]) | |
| 1713 # motif[4] <- sapply(motif[4], function (x) aa_props[x]) | |
| 1714 # motif[5] <- sapply(motif[5], function (x) aa_props[x]) | |
| 1715 # motif[6] <- sapply(motif[6], function (x) aa_props[x]) | |
| 1716 # motif[7] <- sapply(motif[7], function (x) aa_props[x]) | |
| 1717 # motif[8] <- sapply(motif[8], function (x) aa_props[x]) | |
| 1718 # motif[9] <- sapply(motif[9], function (x) aa_props[x]) | |
| 1719 motif<- gsub(" ","O",motif) | |
| 1720 motif <- sapply(motif, function (x) aa_props[x]) | |
| 1721 Scoringpeptide<-motif | |
| 1722 Scoringpeptide<-Scoringpeptide+1 | |
| 1723 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* | |
| 1724 ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]* | |
| 1725 #ThisKinTable[as.numeric(Scoringpeptide[8]),10]* | |
| 1726 ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]* | |
| 1727 ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17] | |
| 1728 NegativeScores[v]<-ThisKinTableScore | |
| 1729 ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2]))) | |
| 1730 NegativeWeirdScores[v]<-ThisKinTableScore*100 | |
| 1731 } | |
| 1732 | |
| 1733 negativesubstrates<-NegativeSubstrateList[,2] | |
| 1734 NegativeWithScores<-cbind(negativesubstrates,as.character(NegativeScores),as.character(NegativeWeirdScores)) | |
| 1735 | |
| 1736 | |
| 1737 #NEED TO HAVE THE NEGATIVE SUBSTRATES BE OUTPUTTED | |
| 1738 | |
| 1739 PositiveScores<-rep(NA,times=nrow(ImportedSubstrateList)) | |
| 1740 PositiveWeirdScores<-rep(NA,times=nrow(ImportedSubstrateList)) | |
| 1741 | |
| 1742 for (v in 1:nrow(ImportedSubstrateList)) { | |
| 1743 motif<-ImportedSubstrateList[v,4:18] | |
| 1744 motif<-unlist(motif) | |
| 1745 motif<- gsub("^$","O",motif) | |
| 1746 motif <- sapply(motif, function (x) aa_props[x]) | |
| 1747 Scoringpeptide<-motif | |
| 1748 Scoringpeptide<-Scoringpeptide+1 | |
| 1749 ThisKinTableScore<-as.numeric(ThisKinTable[Scoringpeptide[1],3])*ThisKinTable[as.numeric(Scoringpeptide[2]),4]*ThisKinTable[as.numeric(Scoringpeptide[3]),5]* | |
| 1750 ThisKinTable[as.numeric(Scoringpeptide[4]),6]*ThisKinTable[as.numeric(Scoringpeptide[5]),7]*ThisKinTable[as.numeric(Scoringpeptide[6]),8]*ThisKinTable[as.numeric(Scoringpeptide[7]),9]* | |
| 1751 #ThisKinTable[as.numeric(Scoringpeptide[8]),10]* | |
| 1752 ThisKinTable[as.numeric(Scoringpeptide[9]),11]*ThisKinTable[as.numeric(Scoringpeptide[10]),12]*ThisKinTable[as.numeric(Scoringpeptide[11]),13]* | |
| 1753 ThisKinTable[as.numeric(Scoringpeptide[12]),14]*ThisKinTable[as.numeric(Scoringpeptide[13]),15]*ThisKinTable[as.numeric(Scoringpeptide[14]),16]*ThisKinTable[as.numeric(Scoringpeptide[15]),17] | |
| 1754 | |
| 1755 PositiveScores[v]<-ThisKinTableScore | |
| 1756 ThisKinTableScore<-(ThisKinTableScore/(ThisKinTableScore+1/as.numeric(NormalizationScore[2]))) | |
| 1757 PositiveWeirdScores[v]<-ThisKinTableScore*100 | |
| 1758 } | |
| 1759 | |
| 1760 positivesubstrates<-ImportedSubstrateList[,4:18] | |
| 1761 positivewithscores<-cbind.data.frame(positivesubstrates,PositiveScores,PositiveWeirdScores) | |
| 1762 | |
| 1763 | |
| 1764 #write down the transient transfection SOP and what we will be doing with them | |
| 1765 #write down the vector names I will be using | |
| 1766 #write down something about transforming bacteria and with what | |
| 1767 | |
| 1768 #90% whatevernness | |
| 1769 # TPninetyone<-length(PositiveWeirdScores[PositiveWeirdScores>=0.91]) | |
| 1770 # Senseninetyone<-TPninetyone/nrow(positivesubstrates) | |
| 1771 # | |
| 1772 # TNninetyone<-length(NegativeWeirdScores[NegativeWeirdScores<91]) | |
| 1773 # Specninetyone<-TNninetyone/100 | |
| 1774 | |
| 1775 #create the MCC table | |
| 1776 | |
| 1777 threshold<-c(1:100,(1:9)/10,(1:9)/100,0,-.1) | |
| 1778 threshold<-threshold[order(threshold,decreasing = TRUE)] | |
| 1779 threshold | |
| 1780 | |
| 1781 Truepositives<-c(1:120) | |
| 1782 Falsenegatives<-c(1:120) | |
| 1783 Sensitivity<-c(1:120) | |
| 1784 TrueNegatives<-c(1:120) | |
| 1785 FalsePositives<-c(1:120) | |
| 1786 One_Minus_Specificity<-c(1:120) | |
| 1787 Accuracy<-c(1:120) | |
| 1788 MCC<-c(1:120) | |
| 1789 EER<-c(1:120) | |
| 1790 FalsePositiveRate<-c(1:120) | |
| 1791 Precision<-c(1:120) | |
| 1792 F_One_Half<-c(1:120) | |
| 1793 F_One<-c(1:120) | |
| 1794 F_Two<-c(1:120) | |
| 1795 | |
| 1796 #MAKE DAMN SURE THAT THE ACCESSION NUMBERS FOLLOW THE MOTIFS | |
| 1797 | |
| 1798 for (z in 1:120) { | |
| 1799 thres<-threshold[z] | |
| 1800 Truepositives[z]<-length(PositiveWeirdScores[PositiveWeirdScores>=(thres)]) | |
| 1801 Falsenegatives[z]<-nrow(positivesubstrates)-Truepositives[z] | |
| 1802 Sensitivity[z]<-Truepositives[z]/(Falsenegatives[z]+Truepositives[z]) | |
| 1803 TrueNegatives[z]<-length(NegativeWeirdScores[NegativeWeirdScores<(thres)]) | |
| 1804 # at thresh 100 this should be 0, because it is total minus true negatives | |
| 1805 FalsePositives[z]<-nrow(NegativeSubstrateList)-TrueNegatives[z] | |
| 1806 One_Minus_Specificity[z]<-1-(TrueNegatives[z]/(FalsePositives[z]+TrueNegatives[z])) | |
| 1807 Accuracy[z]<-100*(Truepositives[z]+TrueNegatives[z])/(Falsenegatives[z]+FalsePositives[z]+TrueNegatives[z]+Truepositives[z]) | |
| 1808 MCC[z]<-((Truepositives[z]*TrueNegatives[z])-(Falsenegatives[z]*FalsePositives[z]))/sqrt(round(round(Truepositives[z]+Falsenegatives[z])*round(TrueNegatives[z]+FalsePositives[z])*round(Truepositives[z]+FalsePositives[z])*round(TrueNegatives[z]+Falsenegatives[z]))) | |
| 1809 EER[z]<-(FalsePositives[z]+Falsenegatives[z])/(Truepositives[z]+TrueNegatives[z]+FalsePositives[z]+Falsenegatives[z]) | |
| 1810 Precision[z]<-Truepositives[z]/(Truepositives[z]+FalsePositives[z]) | |
| 1811 F_One_Half[z]<-(1.5*Precision[z]*Sensitivity[z])/(.25*Precision[z]+Sensitivity[z]) | |
| 1812 F_One<-(1*Precision[z]*Sensitivity[z])/(Precision[z]+Sensitivity[z]) | |
| 1813 F_Two<-(5*Precision[z]*Sensitivity[z])/(4*Precision[z]+Sensitivity[z]) | |
| 1814 FalsePositiveRate[z]<-FalsePositives[z]/(TrueNegatives[z]+FalsePositives[z]) | |
| 1815 } | |
| 1816 Characterization<-cbind.data.frame(threshold,Truepositives,Falsenegatives,Sensitivity,TrueNegatives,FalsePositives,One_Minus_Specificity,MCC,EER,Precision,FalsePositiveRate,F_One_Half,F_One,F_Two) | |
| 1817 | |
| 1818 positiveheader<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,"RPMS","PMS") | |
| 1819 positivewithscores<-rbind.data.frame(positiveheader,positivewithscores) | |
| 1820 | |
| 1821 negativeheader<-c("Substrate","RPMS","PMS") | |
| 1822 colnames(NegativeWithScores)<-negativeheader | |
| 1823 | |
| 1824 # write.xlsx(NegativeWithScores,file = FILENAME, sheetName = "Negative Sequences Scored",col.names = TRUE,row.names = FALSE,append = TRUE) | |
| 1825 # write.xlsx(Characterization,file = FILENAME,sheetName = "Characterization Table",col.names = TRUE,row.names = FALSE,append = TRUE) | |
| 1826 # write.xlsx(RanksPeptides,file = FILENAME,sheetName = "Ranked Generated Peptides",col.names = FALSE,row.names = FALSE,append = TRUE) | |
| 1827 # write.xlsx(positivewithscores,file = FILENAME, sheetName = "Positive Sequences Scored",col.names = FALSE,row.names = FALSE,append = TRUE) | |
| 1828 write.table(x=c("Characterzation Table"),file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",") | |
| 1829 header<-colnames(Characterization) | |
| 1830 Characterization<-rbind.data.frame(header,Characterization) | |
| 1831 write.table(Characterization,file = FILENAME2, col.names = FALSE,row.names = FALSE, append = TRUE,sep = ",") | |
| 1832 | |
| 1833 # header<-colnames(RanksPeptides) | |
| 1834 # RanksPeptides<-rbind.data.frame(header,RanksPeptides) | |
| 1835 write.table(x="Off Target Kinase activity (your kinase of interest should have zeros here because it is ON-target)",file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",") | |
| 1836 write.table(RanksPeptides,file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",") | |
| 1837 | |
| 1838 | |
| 1839 PositivesWithScores<-as.data.frame(ImportedSubstrateList) | |
| 1840 PositiveOutput<-cbind(PositivesWithScores,PositiveWeirdScores) | |
| 1841 write.table(PositiveOutput, file = FILENAME4, sep = ",", na="",row.names=FALSE) | |
| 1842 | |
| 1843 NegativeOutput<-as.data.frame(NegativeSubstrateList) | |
| 1844 NegativeOutput<-cbind(NegativeOutput,NegativeWeirdScores) | |
| 1845 write.table(NegativeOutput,file = FILENAME5,sep = ",",na="",row.names = FALSE) |
