Mercurial > repos > jfb > kt_id_part1
comparison KT part 1/Kinatest-R part 1.R @ 0:27d804c7b43f draft default tip
Uploaded
| author | jfb |
|---|---|
| date | Sun, 16 Sep 2018 18:03:15 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:27d804c7b43f |
|---|---|
| 1 oldw <- getOption("warn") | |
| 2 options(warn = -1) | |
| 3 | |
| 4 ImportedSubstrateList<- read.csv("input1", stringsAsFactors=FALSE) | |
| 5 NegativeSubstrateList<- read.csv("input2", stringsAsFactors=FALSE) | |
| 6 SubstrateBackgroundFrequency<- read.csv("input3", stringsAsFactors=FALSE, header = FALSE) | |
| 7 | |
| 8 ScreenerFilename<-"screener" | |
| 9 | |
| 10 | |
| 11 | |
| 12 FILENAME<-"output1" | |
| 13 FILENAME2<-"output2" | |
| 14 FILENAME3<-"output3" | |
| 15 | |
| 16 | |
| 17 | |
| 18 SubstrateBackgroundFrequency<-t(SubstrateBackgroundFrequency) | |
| 19 # number<-nrow(SubstrateBackgroundFrequency)-1 | |
| 20 SubstrateBackgroundFrequency<-SubstrateBackgroundFrequency[2:nrow(SubstrateBackgroundFrequency),] | |
| 21 Sub<-na.omit(SubstrateBackgroundFrequency) | |
| 22 SubstrateBackgroundFrequency<-Sub | |
| 23 | |
| 24 args = commandArgs(trailingOnly=TRUE) | |
| 25 TodaysKinase<-as.character(args[1]) | |
| 26 | |
| 27 | |
| 28 | |
| 29 | |
| 30 | |
| 31 | |
| 32 OutputMatrix<-"KinaseMatrix.csv" | |
| 33 CharacterizationTable<-"CharacterizationTableForThisKinase.csv" | |
| 34 SDtable<-"SDtableforthisKinase" | |
| 35 SiteSelectivityTable<-"SiteSelectivityForThisKinase" | |
| 36 | |
| 37 | |
| 38 | |
| 39 substrates<-matrix(rep("A",times=((nrow(ImportedSubstrateList)-1)*15)),ncol = 15) | |
| 40 #SeqsToBeScored<-"asdasd" | |
| 41 | |
| 42 for (i in 2:nrow(ImportedSubstrateList)) | |
| 43 { | |
| 44 substratemotif<-ImportedSubstrateList[i,4:18] | |
| 45 substratemotif[8]<-"Y" | |
| 46 #substratemotif<-paste(substratemotif,sep = "",collapse = "") | |
| 47 j=i-1 | |
| 48 substratemotif<-unlist(substratemotif) | |
| 49 substrates[j,1:15]<-substratemotif | |
| 50 } | |
| 51 | |
| 52 # SpacesToOs<-c(""="O",) | |
| 53 # substrates<-SpacesToOs[substrates] | |
| 54 | |
| 55 SubstrateBackgroundFrequency[nrow(SubstrateBackgroundFrequency),2] | |
| 56 | |
| 57 if(2==2){ | |
| 58 Amean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE) | |
| 59 Cmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE) | |
| 60 Dmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE) | |
| 61 Emean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE) | |
| 62 Fmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE) | |
| 63 Gmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE) | |
| 64 Hmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE) | |
| 65 Imean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE) | |
| 66 Kmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE) | |
| 67 Lmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE) | |
| 68 Mmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE) | |
| 69 Nmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE) | |
| 70 Pmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE) | |
| 71 Qmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE) | |
| 72 Rmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE) | |
| 73 Smean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE) | |
| 74 Tmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE) | |
| 75 Vmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE) | |
| 76 Wmean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE) | |
| 77 Ymean<-mean(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE) | |
| 78 | |
| 79 AllMeans<-c(Amean,Cmean,Dmean,Emean,Fmean,Gmean,Hmean,Imean,Kmean,Lmean,Mmean,Nmean,Pmean,Qmean,Rmean,Smean,Tmean,Vmean,Wmean,Ymean) | |
| 80 | |
| 81 Asd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),2]), na.rm=TRUE) | |
| 82 Csd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),3]), na.rm=TRUE) | |
| 83 Dsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),4]), na.rm=TRUE) | |
| 84 Esd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),5]), na.rm=TRUE) | |
| 85 Fsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),6]), na.rm=TRUE) | |
| 86 Gsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),7]), na.rm=TRUE) | |
| 87 Hsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),8]), na.rm=TRUE) | |
| 88 Isd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),9]), na.rm=TRUE) | |
| 89 Ksd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),10]), na.rm=TRUE) | |
| 90 Lsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),11]), na.rm=TRUE) | |
| 91 Msd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),12]), na.rm=TRUE) | |
| 92 Nsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),13]), na.rm=TRUE) | |
| 93 Psd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),14]), na.rm=TRUE) | |
| 94 Qsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),15]), na.rm=TRUE) | |
| 95 Rsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),16]), na.rm=TRUE) | |
| 96 Ssd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),17]), na.rm=TRUE) | |
| 97 Tsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),18]), na.rm=TRUE) | |
| 98 Vsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),19]), na.rm=TRUE) | |
| 99 Wsd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),20]), na.rm=TRUE) | |
| 100 Ysd<-sd(as.numeric(SubstrateBackgroundFrequency[1:(nrow(SubstrateBackgroundFrequency)),21]), na.rm=TRUE) | |
| 101 } | |
| 102 AllSDs<-c(Asd,Csd,Dsd,Esd,Fsd,Gsd,Hsd,Isd,Ksd,Lsd,Msd,Nsd,Psd,Qsd,Rsd,Ssd,Tsd,Vsd,Wsd,Ysd) | |
| 103 #this is subbackfreq SDs | |
| 104 | |
| 105 SBF_statisticalvalues<-cbind(AllMeans,AllSDs) | |
| 106 | |
| 107 #create the percent table | |
| 108 if (1==1){ | |
| 109 Column1<-substrates[,1] | |
| 110 Column2<-substrates[,2] | |
| 111 Column3<-substrates[,3] | |
| 112 Column4<-substrates[,4] | |
| 113 Column5<-substrates[,5] | |
| 114 Column6<-substrates[,6] | |
| 115 Column7<-substrates[,7] | |
| 116 Column8<-substrates[,8] | |
| 117 Column9<-substrates[,9] | |
| 118 Column10<-substrates[,10] | |
| 119 Column11<-substrates[,11] | |
| 120 Column12<-substrates[,12] | |
| 121 Column13<-substrates[,13] | |
| 122 Column14<-substrates[,14] | |
| 123 Column15<-substrates[,15] | |
| 124 | |
| 125 spaces1<-sum((Column1%in% "")) | |
| 126 spaces2<-sum(Column2%in% "") | |
| 127 spaces3<-sum(Column3%in% "") | |
| 128 spaces4<-sum(Column4%in% "") | |
| 129 spaces5<-sum(Column5%in% "") | |
| 130 spaces6<-sum(Column6%in% "") | |
| 131 spaces7<-sum(Column7%in% "") | |
| 132 spaces8<-sum(Column8%in% "") | |
| 133 spaces9<-sum(Column9%in% "") | |
| 134 spaces10<-sum(Column10%in% "") | |
| 135 spaces11<-sum(Column11%in% "") | |
| 136 spaces12<-sum(Column12%in% "") | |
| 137 spaces13<-sum(Column13%in% "") | |
| 138 spaces14<-sum(Column14%in% "") | |
| 139 spaces15<-sum(Column15%in% "") | |
| 140 | |
| 141 A1<-sum(Column1 %in% "A")/(length(Column1)-spaces1) | |
| 142 A2<-sum(Column2 %in% "A")/(length(Column2)-spaces2) | |
| 143 A3<-sum(Column3 %in% "A")/(length(Column3)-spaces3) | |
| 144 A4<-sum(Column4 %in% "A")/(length(Column4)-spaces4) | |
| 145 A5<-sum(Column5 %in% "A")/(length(Column5)-spaces5) | |
| 146 A6<-sum(Column6 %in% "A")/(length(Column6)-spaces6) | |
| 147 A7<-sum(Column7 %in% "A")/(length(Column7)-spaces7) | |
| 148 A8<-sum(Column8 %in% "A")/(length(Column8)-spaces8) | |
| 149 A9<-sum(Column9 %in% "A")/(length(Column9)-spaces9) | |
| 150 A10<-sum(Column10 %in% "A")/(length(Column10)-spaces10) | |
| 151 A11<-sum(Column11 %in% "A")/(length(Column11)-spaces11) | |
| 152 A12<-sum(Column12 %in% "A")/(length(Column12)-spaces12) | |
| 153 A13<-sum(Column13 %in% "A")/(length(Column13)-spaces13) | |
| 154 A14<-sum(Column14 %in% "A")/(length(Column14)-spaces14) | |
| 155 A15<-sum(Column15 %in% "A")/(length(Column15)-spaces15) | |
| 156 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15) | |
| 157 | |
| 158 C1<-sum(Column1 %in% "C")/(length(Column1)-spaces1) | |
| 159 C2<-sum(Column2 %in% "C")/(length(Column2)-spaces2) | |
| 160 C3<-sum(Column3 %in% "C")/(length(Column3)-spaces3) | |
| 161 C4<-sum(Column4 %in% "C")/(length(Column4)-spaces4) | |
| 162 C5<-sum(Column5 %in% "C")/(length(Column5)-spaces5) | |
| 163 C6<-sum(Column6 %in% "C")/(length(Column6)-spaces6) | |
| 164 C7<-sum(Column7 %in% "C")/(length(Column7)-spaces7) | |
| 165 C8<-sum(Column8 %in% "C")/(length(Column8)-spaces8) | |
| 166 C9<-sum(Column9 %in% "C")/(length(Column9)-spaces9) | |
| 167 C10<-sum(Column10 %in% "C")/(length(Column10)-spaces10) | |
| 168 C11<-sum(Column11 %in% "C")/(length(Column11)-spaces11) | |
| 169 C12<-sum(Column12 %in% "C")/(length(Column12)-spaces12) | |
| 170 C13<-sum(Column13 %in% "C")/(length(Column13)-spaces13) | |
| 171 C14<-sum(Column14 %in% "C")/(length(Column14)-spaces14) | |
| 172 C15<-sum(Column15 %in% "C")/(length(Column15)-spaces15) | |
| 173 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15) | |
| 174 | |
| 175 D1<-sum(Column1 %in% "D")/(length(Column1)-spaces1) | |
| 176 D2<-sum(Column2 %in% "D")/(length(Column2)-spaces2) | |
| 177 D3<-sum(Column3 %in% "D")/(length(Column3)-spaces3) | |
| 178 D4<-sum(Column4 %in% "D")/(length(Column4)-spaces4) | |
| 179 D5<-sum(Column5 %in% "D")/(length(Column5)-spaces5) | |
| 180 D6<-sum(Column6 %in% "D")/(length(Column6)-spaces6) | |
| 181 D7<-sum(Column7 %in% "D")/(length(Column7)-spaces7) | |
| 182 D8<-sum(Column8 %in% "D")/(length(Column8)-spaces8) | |
| 183 D9<-sum(Column9 %in% "D")/(length(Column9)-spaces9) | |
| 184 D10<-sum(Column10 %in% "D")/(length(Column10)-spaces10) | |
| 185 D11<-sum(Column11 %in% "D")/(length(Column11)-spaces11) | |
| 186 D12<-sum(Column12 %in% "D")/(length(Column12)-spaces12) | |
| 187 D13<-sum(Column13 %in% "D")/(length(Column13)-spaces13) | |
| 188 D14<-sum(Column14 %in% "D")/(length(Column14)-spaces14) | |
| 189 D15<-sum(Column15 %in% "D")/(length(Column15)-spaces15) | |
| 190 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15) | |
| 191 | |
| 192 E1<-sum(Column1 %in% "E")/(length(Column1)-spaces1) | |
| 193 E2<-sum(Column2 %in% "E")/(length(Column2)-spaces2) | |
| 194 E3<-sum(Column3 %in% "E")/(length(Column3)-spaces3) | |
| 195 E4<-sum(Column4 %in% "E")/(length(Column4)-spaces4) | |
| 196 E5<-sum(Column5 %in% "E")/(length(Column5)-spaces5) | |
| 197 E6<-sum(Column6 %in% "E")/(length(Column6)-spaces6) | |
| 198 E7<-sum(Column7 %in% "E")/(length(Column7)-spaces7) | |
| 199 E8<-sum(Column8 %in% "E")/(length(Column8)-spaces8) | |
| 200 E9<-sum(Column9 %in% "E")/(length(Column9)-spaces9) | |
| 201 E10<-sum(Column10 %in% "E")/(length(Column10)-spaces10) | |
| 202 E11<-sum(Column11 %in% "E")/(length(Column11)-spaces11) | |
| 203 E12<-sum(Column12 %in% "E")/(length(Column12)-spaces12) | |
| 204 E13<-sum(Column13 %in% "E")/(length(Column13)-spaces13) | |
| 205 E14<-sum(Column14 %in% "E")/(length(Column14)-spaces14) | |
| 206 E15<-sum(Column15 %in% "E")/(length(Column15)-spaces15) | |
| 207 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15) | |
| 208 | |
| 209 | |
| 210 F1<-sum(Column1 %in% "F")/(length(Column1)-spaces1) | |
| 211 F2<-sum(Column2 %in% "F")/(length(Column2)-spaces2) | |
| 212 F3<-sum(Column3 %in% "F")/(length(Column3)-spaces3) | |
| 213 F4<-sum(Column4 %in% "F")/(length(Column4)-spaces4) | |
| 214 F5<-sum(Column5 %in% "F")/(length(Column5)-spaces5) | |
| 215 F6<-sum(Column6 %in% "F")/(length(Column6)-spaces6) | |
| 216 F7<-sum(Column7 %in% "F")/(length(Column7)-spaces7) | |
| 217 F8<-sum(Column8 %in% "F")/(length(Column8)-spaces8) | |
| 218 F9<-sum(Column9 %in% "F")/(length(Column9)-spaces9) | |
| 219 F10<-sum(Column10 %in% "F")/(length(Column10)-spaces10) | |
| 220 F11<-sum(Column11 %in% "F")/(length(Column11)-spaces11) | |
| 221 F12<-sum(Column12 %in% "F")/(length(Column12)-spaces12) | |
| 222 F13<-sum(Column13 %in% "F")/(length(Column13)-spaces13) | |
| 223 F14<-sum(Column14 %in% "F")/(length(Column14)-spaces14) | |
| 224 F15<-sum(Column15 %in% "F")/(length(Column15)-spaces15) | |
| 225 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15) | |
| 226 | |
| 227 | |
| 228 G1<-sum(Column1 %in% "G")/(length(Column1)-spaces1) | |
| 229 G2<-sum(Column2 %in% "G")/(length(Column2)-spaces2) | |
| 230 G3<-sum(Column3 %in% "G")/(length(Column3)-spaces3) | |
| 231 G4<-sum(Column4 %in% "G")/(length(Column4)-spaces4) | |
| 232 G5<-sum(Column5 %in% "G")/(length(Column5)-spaces5) | |
| 233 G6<-sum(Column6 %in% "G")/(length(Column6)-spaces6) | |
| 234 G7<-sum(Column7 %in% "G")/(length(Column7)-spaces7) | |
| 235 G8<-sum(Column8 %in% "G")/(length(Column8)-spaces8) | |
| 236 G9<-sum(Column9 %in% "G")/(length(Column9)-spaces9) | |
| 237 G10<-sum(Column10 %in% "G")/(length(Column10)-spaces10) | |
| 238 G11<-sum(Column11 %in% "G")/(length(Column11)-spaces11) | |
| 239 G12<-sum(Column12 %in% "G")/(length(Column12)-spaces12) | |
| 240 G13<-sum(Column13 %in% "G")/(length(Column13)-spaces13) | |
| 241 G14<-sum(Column14 %in% "G")/(length(Column14)-spaces14) | |
| 242 G15<-sum(Column15 %in% "G")/(length(Column15)-spaces15) | |
| 243 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15) | |
| 244 | |
| 245 | |
| 246 H1<-sum(Column1 %in% "H")/(length(Column1)-spaces1) | |
| 247 H2<-sum(Column2 %in% "H")/(length(Column2)-spaces2) | |
| 248 H3<-sum(Column3 %in% "H")/(length(Column3)-spaces3) | |
| 249 H4<-sum(Column4 %in% "H")/(length(Column4)-spaces4) | |
| 250 H5<-sum(Column5 %in% "H")/(length(Column5)-spaces5) | |
| 251 H6<-sum(Column6 %in% "H")/(length(Column6)-spaces6) | |
| 252 H7<-sum(Column7 %in% "H")/(length(Column7)-spaces7) | |
| 253 H8<-sum(Column8 %in% "H")/(length(Column8)-spaces8) | |
| 254 H9<-sum(Column9 %in% "H")/(length(Column9)-spaces9) | |
| 255 H10<-sum(Column10 %in% "H")/(length(Column10)-spaces10) | |
| 256 H11<-sum(Column11 %in% "H")/(length(Column11)-spaces11) | |
| 257 H12<-sum(Column12 %in% "H")/(length(Column12)-spaces12) | |
| 258 H13<-sum(Column13 %in% "H")/(length(Column13)-spaces13) | |
| 259 H14<-sum(Column14 %in% "H")/(length(Column14)-spaces14) | |
| 260 H15<-sum(Column15 %in% "H")/(length(Column15)-spaces15) | |
| 261 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15) | |
| 262 | |
| 263 | |
| 264 I1<-sum(Column1 %in% "I")/(length(Column1)-spaces1) | |
| 265 I2<-sum(Column2 %in% "I")/(length(Column2)-spaces2) | |
| 266 I3<-sum(Column3 %in% "I")/(length(Column3)-spaces3) | |
| 267 I4<-sum(Column4 %in% "I")/(length(Column4)-spaces4) | |
| 268 I5<-sum(Column5 %in% "I")/(length(Column5)-spaces5) | |
| 269 I6<-sum(Column6 %in% "I")/(length(Column6)-spaces6) | |
| 270 I7<-sum(Column7 %in% "I")/(length(Column7)-spaces7) | |
| 271 I8<-sum(Column8 %in% "I")/(length(Column8)-spaces8) | |
| 272 I9<-sum(Column9 %in% "I")/(length(Column9)-spaces9) | |
| 273 I10<-sum(Column10 %in% "I")/(length(Column10)-spaces10) | |
| 274 I11<-sum(Column11 %in% "I")/(length(Column11)-spaces11) | |
| 275 I12<-sum(Column12 %in% "I")/(length(Column12)-spaces12) | |
| 276 I13<-sum(Column13 %in% "I")/(length(Column13)-spaces13) | |
| 277 I14<-sum(Column14 %in% "I")/(length(Column14)-spaces14) | |
| 278 I15<-sum(Column15 %in% "I")/(length(Column15)-spaces15) | |
| 279 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15) | |
| 280 | |
| 281 | |
| 282 K1<-sum(Column1 %in% "K")/(length(Column1)-spaces1) | |
| 283 K2<-sum(Column2 %in% "K")/(length(Column2)-spaces2) | |
| 284 K3<-sum(Column3 %in% "K")/(length(Column3)-spaces3) | |
| 285 K4<-sum(Column4 %in% "K")/(length(Column4)-spaces4) | |
| 286 K5<-sum(Column5 %in% "K")/(length(Column5)-spaces5) | |
| 287 K6<-sum(Column6 %in% "K")/(length(Column6)-spaces6) | |
| 288 K7<-sum(Column7 %in% "K")/(length(Column7)-spaces7) | |
| 289 K8<-sum(Column8 %in% "K")/(length(Column8)-spaces8) | |
| 290 K9<-sum(Column9 %in% "K")/(length(Column9)-spaces9) | |
| 291 K10<-sum(Column10 %in% "K")/(length(Column10)-spaces10) | |
| 292 K11<-sum(Column11 %in% "K")/(length(Column11)-spaces11) | |
| 293 K12<-sum(Column12 %in% "K")/(length(Column12)-spaces12) | |
| 294 K13<-sum(Column13 %in% "K")/(length(Column13)-spaces13) | |
| 295 K14<-sum(Column14 %in% "K")/(length(Column14)-spaces14) | |
| 296 K15<-sum(Column15 %in% "K")/(length(Column15)-spaces15) | |
| 297 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15) | |
| 298 | |
| 299 | |
| 300 L1<-sum(Column1 %in% "L")/(length(Column1)-spaces1) | |
| 301 L2<-sum(Column2 %in% "L")/(length(Column2)-spaces2) | |
| 302 L3<-sum(Column3 %in% "L")/(length(Column3)-spaces3) | |
| 303 L4<-sum(Column4 %in% "L")/(length(Column4)-spaces4) | |
| 304 L5<-sum(Column5 %in% "L")/(length(Column5)-spaces5) | |
| 305 L6<-sum(Column6 %in% "L")/(length(Column6)-spaces6) | |
| 306 L7<-sum(Column7 %in% "L")/(length(Column7)-spaces7) | |
| 307 L8<-sum(Column8 %in% "L")/(length(Column8)-spaces8) | |
| 308 L9<-sum(Column9 %in% "L")/(length(Column9)-spaces9) | |
| 309 L10<-sum(Column10 %in% "L")/(length(Column10)-spaces10) | |
| 310 L11<-sum(Column11 %in% "L")/(length(Column11)-spaces11) | |
| 311 L12<-sum(Column12 %in% "L")/(length(Column12)-spaces12) | |
| 312 L13<-sum(Column13 %in% "L")/(length(Column13)-spaces13) | |
| 313 L14<-sum(Column14 %in% "L")/(length(Column14)-spaces14) | |
| 314 L15<-sum(Column15 %in% "L")/(length(Column15)-spaces15) | |
| 315 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15) | |
| 316 | |
| 317 | |
| 318 M1<-sum(Column1 %in% "M")/(length(Column1)-spaces1) | |
| 319 M2<-sum(Column2 %in% "M")/(length(Column2)-spaces2) | |
| 320 M3<-sum(Column3 %in% "M")/(length(Column3)-spaces3) | |
| 321 M4<-sum(Column4 %in% "M")/(length(Column4)-spaces4) | |
| 322 M5<-sum(Column5 %in% "M")/(length(Column5)-spaces5) | |
| 323 M6<-sum(Column6 %in% "M")/(length(Column6)-spaces6) | |
| 324 M7<-sum(Column7 %in% "M")/(length(Column7)-spaces7) | |
| 325 M8<-sum(Column8 %in% "M")/(length(Column8)-spaces8) | |
| 326 M9<-sum(Column9 %in% "M")/(length(Column9)-spaces9) | |
| 327 M10<-sum(Column10 %in% "M")/(length(Column10)-spaces10) | |
| 328 M11<-sum(Column11 %in% "M")/(length(Column11)-spaces11) | |
| 329 M12<-sum(Column12 %in% "M")/(length(Column12)-spaces12) | |
| 330 M13<-sum(Column13 %in% "M")/(length(Column13)-spaces13) | |
| 331 M14<-sum(Column14 %in% "M")/(length(Column14)-spaces14) | |
| 332 M15<-sum(Column15 %in% "M")/(length(Column15)-spaces15) | |
| 333 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15) | |
| 334 | |
| 335 | |
| 336 N1<-sum(Column1 %in% "N")/(length(Column1)-spaces1) | |
| 337 N2<-sum(Column2 %in% "N")/(length(Column2)-spaces2) | |
| 338 N3<-sum(Column3 %in% "N")/(length(Column3)-spaces3) | |
| 339 N4<-sum(Column4 %in% "N")/(length(Column4)-spaces4) | |
| 340 N5<-sum(Column5 %in% "N")/(length(Column5)-spaces5) | |
| 341 N6<-sum(Column6 %in% "N")/(length(Column6)-spaces6) | |
| 342 N7<-sum(Column7 %in% "N")/(length(Column7)-spaces7) | |
| 343 N8<-sum(Column8 %in% "N")/(length(Column8)-spaces8) | |
| 344 N9<-sum(Column9 %in% "N")/(length(Column9)-spaces9) | |
| 345 N10<-sum(Column10 %in% "N")/(length(Column10)-spaces10) | |
| 346 N11<-sum(Column11 %in% "N")/(length(Column11)-spaces11) | |
| 347 N12<-sum(Column12 %in% "N")/(length(Column12)-spaces12) | |
| 348 N13<-sum(Column13 %in% "N")/(length(Column13)-spaces13) | |
| 349 N14<-sum(Column14 %in% "N")/(length(Column14)-spaces14) | |
| 350 N15<-sum(Column15 %in% "N")/(length(Column15)-spaces15) | |
| 351 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15) | |
| 352 | |
| 353 | |
| 354 P1<-sum(Column1 %in% "P")/(length(Column1)-spaces1) | |
| 355 P2<-sum(Column2 %in% "P")/(length(Column2)-spaces2) | |
| 356 P3<-sum(Column3 %in% "P")/(length(Column3)-spaces3) | |
| 357 P4<-sum(Column4 %in% "P")/(length(Column4)-spaces4) | |
| 358 P5<-sum(Column5 %in% "P")/(length(Column5)-spaces5) | |
| 359 P6<-sum(Column6 %in% "P")/(length(Column6)-spaces6) | |
| 360 P7<-sum(Column7 %in% "P")/(length(Column7)-spaces7) | |
| 361 P8<-sum(Column8 %in% "P")/(length(Column8)-spaces8) | |
| 362 P9<-sum(Column9 %in% "P")/(length(Column9)-spaces9) | |
| 363 P10<-sum(Column10 %in% "P")/(length(Column10)-spaces10) | |
| 364 P11<-sum(Column11 %in% "P")/(length(Column11)-spaces11) | |
| 365 P12<-sum(Column12 %in% "P")/(length(Column12)-spaces12) | |
| 366 P13<-sum(Column13 %in% "P")/(length(Column13)-spaces13) | |
| 367 P14<-sum(Column14 %in% "P")/(length(Column14)-spaces14) | |
| 368 P15<-sum(Column15 %in% "P")/(length(Column15)-spaces15) | |
| 369 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15) | |
| 370 | |
| 371 | |
| 372 Q1<-sum(Column1 %in% "Q")/(length(Column1)-spaces1) | |
| 373 Q2<-sum(Column2 %in% "Q")/(length(Column2)-spaces2) | |
| 374 Q3<-sum(Column3 %in% "Q")/(length(Column3)-spaces3) | |
| 375 Q4<-sum(Column4 %in% "Q")/(length(Column4)-spaces4) | |
| 376 Q5<-sum(Column5 %in% "Q")/(length(Column5)-spaces5) | |
| 377 Q6<-sum(Column6 %in% "Q")/(length(Column6)-spaces6) | |
| 378 Q7<-sum(Column7 %in% "Q")/(length(Column7)-spaces7) | |
| 379 Q8<-sum(Column8 %in% "Q")/(length(Column8)-spaces8) | |
| 380 Q9<-sum(Column9 %in% "Q")/(length(Column9)-spaces9) | |
| 381 Q10<-sum(Column10 %in% "Q")/(length(Column10)-spaces10) | |
| 382 Q11<-sum(Column11 %in% "Q")/(length(Column11)-spaces11) | |
| 383 Q12<-sum(Column12 %in% "Q")/(length(Column12)-spaces12) | |
| 384 Q13<-sum(Column13 %in% "Q")/(length(Column13)-spaces13) | |
| 385 Q14<-sum(Column14 %in% "Q")/(length(Column14)-spaces14) | |
| 386 Q15<-sum(Column15 %in% "Q")/(length(Column15)-spaces15) | |
| 387 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15) | |
| 388 | |
| 389 | |
| 390 R1<-sum(Column1 %in% "R")/(length(Column1)-spaces1) | |
| 391 R2<-sum(Column2 %in% "R")/(length(Column2)-spaces2) | |
| 392 R3<-sum(Column3 %in% "R")/(length(Column3)-spaces3) | |
| 393 R4<-sum(Column4 %in% "R")/(length(Column4)-spaces4) | |
| 394 R5<-sum(Column5 %in% "R")/(length(Column5)-spaces5) | |
| 395 R6<-sum(Column6 %in% "R")/(length(Column6)-spaces6) | |
| 396 R7<-sum(Column7 %in% "R")/(length(Column7)-spaces7) | |
| 397 R8<-sum(Column8 %in% "R")/(length(Column8)-spaces8) | |
| 398 R9<-sum(Column9 %in% "R")/(length(Column9)-spaces9) | |
| 399 R10<-sum(Column10 %in% "R")/(length(Column10)-spaces10) | |
| 400 R11<-sum(Column11 %in% "R")/(length(Column11)-spaces11) | |
| 401 R12<-sum(Column12 %in% "R")/(length(Column12)-spaces12) | |
| 402 R13<-sum(Column13 %in% "R")/(length(Column13)-spaces13) | |
| 403 R14<-sum(Column14 %in% "R")/(length(Column14)-spaces14) | |
| 404 R15<-sum(Column15 %in% "R")/(length(Column15)-spaces15) | |
| 405 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15) | |
| 406 | |
| 407 | |
| 408 S1<-sum(Column1 %in% "S")/(length(Column1)-spaces1) | |
| 409 S2<-sum(Column2 %in% "S")/(length(Column2)-spaces2) | |
| 410 S3<-sum(Column3 %in% "S")/(length(Column3)-spaces3) | |
| 411 S4<-sum(Column4 %in% "S")/(length(Column4)-spaces4) | |
| 412 S5<-sum(Column5 %in% "S")/(length(Column5)-spaces5) | |
| 413 S6<-sum(Column6 %in% "S")/(length(Column6)-spaces6) | |
| 414 S7<-sum(Column7 %in% "S")/(length(Column7)-spaces7) | |
| 415 S8<-sum(Column8 %in% "S")/(length(Column8)-spaces8) | |
| 416 S9<-sum(Column9 %in% "S")/(length(Column9)-spaces9) | |
| 417 S10<-sum(Column10 %in% "S")/(length(Column10)-spaces10) | |
| 418 S11<-sum(Column11 %in% "S")/(length(Column11)-spaces11) | |
| 419 S12<-sum(Column12 %in% "S")/(length(Column12)-spaces12) | |
| 420 S13<-sum(Column13 %in% "S")/(length(Column13)-spaces13) | |
| 421 S14<-sum(Column14 %in% "S")/(length(Column14)-spaces14) | |
| 422 S15<-sum(Column15 %in% "S")/(length(Column15)-spaces15) | |
| 423 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15) | |
| 424 | |
| 425 | |
| 426 T1<-sum(Column1 %in% "T")/(length(Column1)-spaces1) | |
| 427 T2<-sum(Column2 %in% "T")/(length(Column2)-spaces2) | |
| 428 T3<-sum(Column3 %in% "T")/(length(Column3)-spaces3) | |
| 429 T4<-sum(Column4 %in% "T")/(length(Column4)-spaces4) | |
| 430 T5<-sum(Column5 %in% "T")/(length(Column5)-spaces5) | |
| 431 T6<-sum(Column6 %in% "T")/(length(Column6)-spaces6) | |
| 432 T7<-sum(Column7 %in% "T")/(length(Column7)-spaces7) | |
| 433 T8<-sum(Column8 %in% "T")/(length(Column8)-spaces8) | |
| 434 T9<-sum(Column9 %in% "T")/(length(Column9)-spaces9) | |
| 435 T10<-sum(Column10 %in% "T")/(length(Column10)-spaces10) | |
| 436 T11<-sum(Column11 %in% "T")/(length(Column11)-spaces11) | |
| 437 T12<-sum(Column12 %in% "T")/(length(Column12)-spaces12) | |
| 438 T13<-sum(Column13 %in% "T")/(length(Column13)-spaces13) | |
| 439 T14<-sum(Column14 %in% "T")/(length(Column14)-spaces14) | |
| 440 T15<-sum(Column15 %in% "T")/(length(Column15)-spaces15) | |
| 441 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15) | |
| 442 | |
| 443 | |
| 444 V1<-sum(Column1 %in% "V")/(length(Column1)-spaces1) | |
| 445 V2<-sum(Column2 %in% "V")/(length(Column2)-spaces2) | |
| 446 V3<-sum(Column3 %in% "V")/(length(Column3)-spaces3) | |
| 447 V4<-sum(Column4 %in% "V")/(length(Column4)-spaces4) | |
| 448 V5<-sum(Column5 %in% "V")/(length(Column5)-spaces5) | |
| 449 V6<-sum(Column6 %in% "V")/(length(Column6)-spaces6) | |
| 450 V7<-sum(Column7 %in% "V")/(length(Column7)-spaces7) | |
| 451 V8<-sum(Column8 %in% "V")/(length(Column8)-spaces8) | |
| 452 V9<-sum(Column9 %in% "V")/(length(Column9)-spaces9) | |
| 453 V10<-sum(Column10 %in% "V")/(length(Column10)-spaces10) | |
| 454 V11<-sum(Column11 %in% "V")/(length(Column11)-spaces11) | |
| 455 V12<-sum(Column12 %in% "V")/(length(Column12)-spaces12) | |
| 456 V13<-sum(Column13 %in% "V")/(length(Column13)-spaces13) | |
| 457 V14<-sum(Column14 %in% "V")/(length(Column14)-spaces14) | |
| 458 V15<-sum(Column15 %in% "V")/(length(Column15)-spaces15) | |
| 459 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15) | |
| 460 | |
| 461 | |
| 462 W1<-sum(Column1 %in% "W")/(length(Column1)-spaces1) | |
| 463 W2<-sum(Column2 %in% "W")/(length(Column2)-spaces2) | |
| 464 W3<-sum(Column3 %in% "W")/(length(Column3)-spaces3) | |
| 465 W4<-sum(Column4 %in% "W")/(length(Column4)-spaces4) | |
| 466 W5<-sum(Column5 %in% "W")/(length(Column5)-spaces5) | |
| 467 W6<-sum(Column6 %in% "W")/(length(Column6)-spaces6) | |
| 468 W7<-sum(Column7 %in% "W")/(length(Column7)-spaces7) | |
| 469 W8<-sum(Column8 %in% "W")/(length(Column8)-spaces8) | |
| 470 W9<-sum(Column9 %in% "W")/(length(Column9)-spaces9) | |
| 471 W10<-sum(Column10 %in% "W")/(length(Column10)-spaces10) | |
| 472 W11<-sum(Column11 %in% "W")/(length(Column11)-spaces11) | |
| 473 W12<-sum(Column12 %in% "W")/(length(Column12)-spaces12) | |
| 474 W13<-sum(Column13 %in% "W")/(length(Column13)-spaces13) | |
| 475 W14<-sum(Column14 %in% "W")/(length(Column14)-spaces14) | |
| 476 W15<-sum(Column15 %in% "W")/(length(Column15)-spaces15) | |
| 477 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15) | |
| 478 | |
| 479 | |
| 480 Y1<-sum(Column1 %in% "Y")/(length(Column1)-spaces1) | |
| 481 Y2<-sum(Column2 %in% "Y")/(length(Column2)-spaces2) | |
| 482 Y3<-sum(Column3 %in% "Y")/(length(Column3)-spaces3) | |
| 483 Y4<-sum(Column4 %in% "Y")/(length(Column4)-spaces4) | |
| 484 Y5<-sum(Column5 %in% "Y")/(length(Column5)-spaces5) | |
| 485 Y6<-sum(Column6 %in% "Y")/(length(Column6)-spaces6) | |
| 486 Y7<-sum(Column7 %in% "Y")/(length(Column7)-spaces7) | |
| 487 Y8<-sum(Column8 %in% "Y")/(length(Column8)-spaces8) | |
| 488 Y9<-sum(Column9 %in% "Y")/(length(Column9)-spaces9) | |
| 489 Y10<-sum(Column10 %in% "Y")/(length(Column10)-spaces10) | |
| 490 Y11<-sum(Column11 %in% "Y")/(length(Column11)-spaces11) | |
| 491 Y12<-sum(Column12 %in% "Y")/(length(Column12)-spaces12) | |
| 492 Y13<-sum(Column13 %in% "Y")/(length(Column13)-spaces13) | |
| 493 Y14<-sum(Column14 %in% "Y")/(length(Column14)-spaces14) | |
| 494 Y15<-sum(Column15 %in% "Y")/(length(Column15)-spaces15) | |
| 495 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15) | |
| 496 } | |
| 497 #this is substrate percents | |
| 498 | |
| 499 #A C D E F G H I K L N P Q R S T V W Y | |
| 500 | |
| 501 PercentTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs) | |
| 502 PercentTable<-PercentTable*100 | |
| 503 | |
| 504 #create the SD table | |
| 505 SDtable<-matrix(data = rep(1,times=(nrow(PercentTable)*ncol(PercentTable))),nrow = nrow(PercentTable),ncol = ncol(PercentTable)) | |
| 506 #for every row, a percertage minus the same mean over the same SD | |
| 507 if(1==1){ | |
| 508 SDtable[1,]<-(PercentTable[1,]-Amean)/Asd | |
| 509 SDtable[2,]<-(PercentTable[2,]-Cmean)/Csd | |
| 510 SDtable[3,]<-(PercentTable[3,]-Dmean)/Dsd | |
| 511 SDtable[4,]<-(PercentTable[4,]-Emean)/Esd | |
| 512 SDtable[5,]<-(PercentTable[5,]-Fmean)/Fsd | |
| 513 SDtable[6,]<-(PercentTable[6,]-Gmean)/Gsd | |
| 514 SDtable[7,]<-(PercentTable[7,]-Hmean)/Hsd | |
| 515 SDtable[8,]<-(PercentTable[8,]-Imean)/Isd | |
| 516 SDtable[9,]<-(PercentTable[9,]-Kmean)/Ksd | |
| 517 SDtable[10,]<-(PercentTable[10,]-Lmean)/Lsd | |
| 518 SDtable[11,]<-(PercentTable[11,]-Mmean)/Msd | |
| 519 SDtable[12,]<-(PercentTable[12,]-Nmean)/Nsd | |
| 520 SDtable[13,]<-(PercentTable[13,]-Pmean)/Psd | |
| 521 SDtable[14,]<-(PercentTable[14,]-Qmean)/Qsd | |
| 522 SDtable[15,]<-(PercentTable[15,]-Rmean)/Rsd | |
| 523 SDtable[16,]<-(PercentTable[16,]-Smean)/Ssd | |
| 524 SDtable[17,]<-(PercentTable[17,]-Tmean)/Tsd | |
| 525 SDtable[18,]<-(PercentTable[18,]-Vmean)/Vsd | |
| 526 SDtable[19,]<-(PercentTable[19,]-Wmean)/Wsd | |
| 527 SDtable[20,]<-(PercentTable[20,]-Ymean)/Ysd | |
| 528 } | |
| 529 | |
| 530 | |
| 531 SetOfAAs<-c("Letter","A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","V","W","Y") | |
| 532 | |
| 533 SumOfSigmaAAs<-c(1:15) | |
| 534 | |
| 535 for (i in 1:15){ | |
| 536 SumOfSigmasValue<-0 | |
| 537 for (j in 1:20){ | |
| 538 value<-0 | |
| 539 if (SDtable[j,i]>2){ | |
| 540 q=j+1 | |
| 541 value<-sum(substrates[,i]==SetOfAAs[q]) | |
| 542 } | |
| 543 SumOfSigmasValue<-SumOfSigmasValue+value | |
| 544 } | |
| 545 SumOfSigmaAAs[i]<-SumOfSigmasValue | |
| 546 } | |
| 547 | |
| 548 AAs1<-length(substrates[,1])-sum(substrates[,1]=="") | |
| 549 AAs2<-length(substrates[,2])-sum(substrates[,2]=="") | |
| 550 AAs3<-length(substrates[,3])-sum(substrates[,3]=="") | |
| 551 AAs4<-length(substrates[,4])-sum(substrates[,4]=="") | |
| 552 AAs5<-length(substrates[,5])-sum(substrates[,5]=="") | |
| 553 AAs6<-length(substrates[,6])-sum(substrates[,6]=="") | |
| 554 AAs7<-length(substrates[,7])-sum(substrates[,7]=="") | |
| 555 AAs8<-length(substrates[,8])-sum(substrates[,8]=="") | |
| 556 AAs9<-length(substrates[,9])-sum(substrates[,9]=="") | |
| 557 AAsAtPositions<-c(AAs1,AAs2,AAs3,AAs4,AAs5,AAs6,AAs7,AAs8,AAs9) | |
| 558 # AAsAtPositions<-c(length(substrates[,1]),length(substrates[,2]),length(substrates[,3]),length(substrates[,4]), | |
| 559 # length(substrates[,5]),length(substrates[,6]),length(substrates[,7]),length(substrates[,8]), | |
| 560 # length(substrates[,9])) | |
| 561 | |
| 562 SumOfExpectedSigmaAAs<-c(1:15) | |
| 563 for (i in 1:15){ | |
| 564 ExpectedValue<-0 | |
| 565 for (j in 1:20){ | |
| 566 value<-0 | |
| 567 if (SDtable[j,i]>2){ | |
| 568 value<-AllMeans[j] | |
| 569 } | |
| 570 ExpectedValue<-ExpectedValue+value | |
| 571 } | |
| 572 SumOfExpectedSigmaAAs[i]<-ExpectedValue*(length(substrates[,i])-sum(substrates[,i]%in% ""))/100 | |
| 573 } | |
| 574 | |
| 575 #SelectivityRow<-SumOfSigmaAAs/SumOfExpectedSigmaAAs | |
| 576 | |
| 577 SelectivityRow<-rep(0,times=15) | |
| 578 | |
| 579 for (q in 1:15) { | |
| 580 SelectivityRow[q]<-(SumOfSigmaAAs[q]/SumOfExpectedSigmaAAs[q])*(SumOfSigmaAAs[q]/AAsAtPositions[q]) | |
| 581 } | |
| 582 | |
| 583 SelectivitySheet<-rbind(SumOfSigmaAAs,SumOfExpectedSigmaAAs,SelectivityRow) | |
| 584 | |
| 585 SetOfAAs<-matrix(data = SetOfAAs,ncol = 1) | |
| 586 SDtableu<-SDtable | |
| 587 HeaderSD<-c(-7:7) | |
| 588 SDtable<-rbind(HeaderSD,SDtableu) | |
| 589 row.names(SDtable)<-NULL | |
| 590 SDtable<-data.frame(SetOfAAs,SDtable) | |
| 591 | |
| 592 PercentTable<-rbind(HeaderSD,PercentTable) | |
| 593 row.names(PercentTable)<-NULL | |
| 594 PercentTable<-data.frame(SetOfAAs,PercentTable) | |
| 595 numberofY<-as.numeric(SubstrateBackgroundFrequency[,34]) | |
| 596 numberofY<-numberofY[!is.na(numberofY)] | |
| 597 | |
| 598 numberofPY<-as.numeric(SubstrateBackgroundFrequency[,35]) | |
| 599 numberofPY<-numberofPY[!is.na(numberofPY)] | |
| 600 | |
| 601 NormalizationScore<-sum(numberofPY)/sum(numberofY) | |
| 602 | |
| 603 # positions<-matrix(data = NA, nrow=20,ncol = 15) | |
| 604 # | |
| 605 # #column1 | |
| 606 # | |
| 607 # for (q in 1:15) { | |
| 608 # sA<-sum(substrates[,i]=="A") | |
| 609 # positions[1,i]<-sA | |
| 610 # sC<-sum(substrates[,i]=="C") | |
| 611 # positions[2,i]<-sC | |
| 612 # sD<-sum(substrates[,i]=="D") | |
| 613 # positions[3,i]<-sD | |
| 614 # sE<-sum(substrates[,i]=="E") | |
| 615 # positions[4,i]<-sE | |
| 616 # sF<-sum(substrates[,i]=="F") | |
| 617 # sG<-sum(substrates[,i]=="G") | |
| 618 # sH<-sum(substrates[,i]=="H") | |
| 619 # sI<-sum(substrates[,i]=="I") | |
| 620 # sK<-sum(substrates[,i]=="K") | |
| 621 # sL<-sum(substrates[,i]=="L") | |
| 622 # sM<-sum(substrates[,i]=="M") | |
| 623 # sN<-sum(substrates[,i]=="N") | |
| 624 # sP<-sum(substrates[,i]=="P") | |
| 625 # sQ<-sum(substrates[,i]=="Q") | |
| 626 # sR<-sum(substrates[,i]=="R") | |
| 627 # sS<-sum(substrates[,i]=="S") | |
| 628 # sT<-sum(substrates[,i]=="T") | |
| 629 # sV<-sum(substrates[,i]=="V") | |
| 630 # sW<-sum(substrates[,i]=="W") | |
| 631 # sY<-sum(substrates[,i]=="Y") | |
| 632 # positions[5,i]<-sF | |
| 633 # positions[6,i]<-sG | |
| 634 # positions[7,i]<-sH | |
| 635 # positions[8,i]<-sI | |
| 636 # positions[9,i]<-sK | |
| 637 # positions[10,i]<-sL | |
| 638 # positions[11,i]<-sM | |
| 639 # positions[12,i]<-sN | |
| 640 # positions[13,i]<-sP | |
| 641 # positions[14,i]<-sQ | |
| 642 # positions[15,i]<-sR | |
| 643 # positions[16,i]<-sS | |
| 644 # positions[17,i]<-sT | |
| 645 # positions[18,i]<-sV | |
| 646 # positions[19,i]<-sW | |
| 647 # positions[20,i]<-sY | |
| 648 # } | |
| 649 | |
| 650 #here I create the positions table which is needed for the endogenous prob matrix and is simply "how many times did an AA show up in this spot? | |
| 651 if (6==6){ | |
| 652 Column1<-substrates[,1] | |
| 653 Column2<-substrates[,2] | |
| 654 Column3<-substrates[,3] | |
| 655 Column4<-substrates[,4] | |
| 656 Column5<-substrates[,5] | |
| 657 Column6<-substrates[,6] | |
| 658 Column7<-substrates[,7] | |
| 659 Column8<-substrates[,8] | |
| 660 Column9<-substrates[,9] | |
| 661 Column10<-substrates[,10] | |
| 662 Column11<-substrates[,11] | |
| 663 Column12<-substrates[,12] | |
| 664 Column13<-substrates[,13] | |
| 665 Column14<-substrates[,14] | |
| 666 Column15<-substrates[,15] | |
| 667 | |
| 668 spaces1<-sum((Column1%in% "")) | |
| 669 spaces2<-sum(Column2%in% "") | |
| 670 spaces3<-sum(Column3%in% "") | |
| 671 spaces4<-sum(Column4%in% "") | |
| 672 spaces5<-sum(Column5%in% "") | |
| 673 spaces6<-sum(Column6%in% "") | |
| 674 spaces7<-sum(Column7%in% "") | |
| 675 spaces8<-sum(Column8%in% "") | |
| 676 spaces9<-sum(Column9%in% "") | |
| 677 spaces10<-sum(Column10%in% "") | |
| 678 spaces11<-sum(Column11%in% "") | |
| 679 spaces12<-sum(Column12%in% "") | |
| 680 spaces13<-sum(Column13%in% "") | |
| 681 spaces14<-sum(Column14%in% "") | |
| 682 spaces15<-sum(Column15%in% "") | |
| 683 | |
| 684 A1<-sum(Column1 %in% "A") | |
| 685 A2<-sum(Column2 %in% "A") | |
| 686 A3<-sum(Column3 %in% "A") | |
| 687 A4<-sum(Column4 %in% "A") | |
| 688 A5<-sum(Column5 %in% "A") | |
| 689 A6<-sum(Column6 %in% "A") | |
| 690 A7<-sum(Column7 %in% "A") | |
| 691 A8<-sum(Column8 %in% "A") | |
| 692 A9<-sum(Column9 %in% "A") | |
| 693 A10<-sum(Column10 %in% "A") | |
| 694 A11<-sum(Column11 %in% "A") | |
| 695 A12<-sum(Column12 %in% "A") | |
| 696 A13<-sum(Column13 %in% "A") | |
| 697 A14<-sum(Column14 %in% "A") | |
| 698 A15<-sum(Column15 %in% "A") | |
| 699 AllAs<-cbind(A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15) | |
| 700 | |
| 701 C1<-sum(Column1 %in% "C") | |
| 702 C2<-sum(Column2 %in% "C") | |
| 703 C3<-sum(Column3 %in% "C") | |
| 704 C4<-sum(Column4 %in% "C") | |
| 705 C5<-sum(Column5 %in% "C") | |
| 706 C6<-sum(Column6 %in% "C") | |
| 707 C7<-sum(Column7 %in% "C") | |
| 708 C8<-sum(Column8 %in% "C") | |
| 709 C9<-sum(Column9 %in% "C") | |
| 710 C10<-sum(Column10 %in% "C") | |
| 711 C11<-sum(Column11 %in% "C") | |
| 712 C12<-sum(Column12 %in% "C") | |
| 713 C13<-sum(Column13 %in% "C") | |
| 714 C14<-sum(Column14 %in% "C") | |
| 715 C15<-sum(Column15 %in% "C") | |
| 716 CllCs<-cbind(C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15) | |
| 717 | |
| 718 D1<-sum(Column1 %in% "D") | |
| 719 D2<-sum(Column2 %in% "D") | |
| 720 D3<-sum(Column3 %in% "D") | |
| 721 D4<-sum(Column4 %in% "D") | |
| 722 D5<-sum(Column5 %in% "D") | |
| 723 D6<-sum(Column6 %in% "D") | |
| 724 D7<-sum(Column7 %in% "D") | |
| 725 D8<-sum(Column8 %in% "D") | |
| 726 D9<-sum(Column9 %in% "D") | |
| 727 D10<-sum(Column10 %in% "D") | |
| 728 D11<-sum(Column11 %in% "D") | |
| 729 D12<-sum(Column12 %in% "D") | |
| 730 D13<-sum(Column13 %in% "D") | |
| 731 D14<-sum(Column14 %in% "D") | |
| 732 D15<-sum(Column15 %in% "D") | |
| 733 DllDs<-cbind(D1,D2,D3,D4,D5,D6,D7,D8,D9,D10,D11,D12,D13,D14,D15) | |
| 734 | |
| 735 E1<-sum(Column1 %in% "E") | |
| 736 E2<-sum(Column2 %in% "E") | |
| 737 E3<-sum(Column3 %in% "E") | |
| 738 E4<-sum(Column4 %in% "E") | |
| 739 E5<-sum(Column5 %in% "E") | |
| 740 E6<-sum(Column6 %in% "E") | |
| 741 E7<-sum(Column7 %in% "E") | |
| 742 E8<-sum(Column8 %in% "E") | |
| 743 E9<-sum(Column9 %in% "E") | |
| 744 E10<-sum(Column10 %in% "E") | |
| 745 E11<-sum(Column11 %in% "E") | |
| 746 E12<-sum(Column12 %in% "E") | |
| 747 E13<-sum(Column13 %in% "E") | |
| 748 E14<-sum(Column14 %in% "E") | |
| 749 E15<-sum(Column15 %in% "E") | |
| 750 EllEs<-cbind(E1,E2,E3,E4,E5,E6,E7,E8,E9,E10,E11,E12,E13,E14,E15) | |
| 751 | |
| 752 F1<-sum(Column1 %in% "F") | |
| 753 F2<-sum(Column2 %in% "F") | |
| 754 F3<-sum(Column3 %in% "F") | |
| 755 F4<-sum(Column4 %in% "F") | |
| 756 F5<-sum(Column5 %in% "F") | |
| 757 F6<-sum(Column6 %in% "F") | |
| 758 F7<-sum(Column7 %in% "F") | |
| 759 F8<-sum(Column8 %in% "F") | |
| 760 F9<-sum(Column9 %in% "F") | |
| 761 F10<-sum(Column10 %in% "F") | |
| 762 F11<-sum(Column11 %in% "F") | |
| 763 F12<-sum(Column12 %in% "F") | |
| 764 F13<-sum(Column13 %in% "F") | |
| 765 F14<-sum(Column14 %in% "F") | |
| 766 F15<-sum(Column15 %in% "F") | |
| 767 FllFs<-cbind(F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15) | |
| 768 | |
| 769 G1<-sum(Column1 %in% "G") | |
| 770 G2<-sum(Column2 %in% "G") | |
| 771 G3<-sum(Column3 %in% "G") | |
| 772 G4<-sum(Column4 %in% "G") | |
| 773 G5<-sum(Column5 %in% "G") | |
| 774 G6<-sum(Column6 %in% "G") | |
| 775 G7<-sum(Column7 %in% "G") | |
| 776 G8<-sum(Column8 %in% "G") | |
| 777 G9<-sum(Column9 %in% "G") | |
| 778 G10<-sum(Column10 %in% "G") | |
| 779 G11<-sum(Column11 %in% "G") | |
| 780 G12<-sum(Column12 %in% "G") | |
| 781 G13<-sum(Column13 %in% "G") | |
| 782 G14<-sum(Column14 %in% "G") | |
| 783 G15<-sum(Column15 %in% "G") | |
| 784 GllGs<-cbind(G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,G11,G12,G13,G14,G15) | |
| 785 | |
| 786 H1<-sum(Column1 %in% "H") | |
| 787 H2<-sum(Column2 %in% "H") | |
| 788 H3<-sum(Column3 %in% "H") | |
| 789 H4<-sum(Column4 %in% "H") | |
| 790 H5<-sum(Column5 %in% "H") | |
| 791 H6<-sum(Column6 %in% "H") | |
| 792 H7<-sum(Column7 %in% "H") | |
| 793 H8<-sum(Column8 %in% "H") | |
| 794 H9<-sum(Column9 %in% "H") | |
| 795 H10<-sum(Column10 %in% "H") | |
| 796 H11<-sum(Column11 %in% "H") | |
| 797 H12<-sum(Column12 %in% "H") | |
| 798 H13<-sum(Column13 %in% "H") | |
| 799 H14<-sum(Column14 %in% "H") | |
| 800 H15<-sum(Column15 %in% "H") | |
| 801 HllHs<-cbind(H1,H2,H3,H4,H5,H6,H7,H8,H9,H10,H11,H12,H13,H14,H15) | |
| 802 | |
| 803 I1<-sum(Column1 %in% "I") | |
| 804 I2<-sum(Column2 %in% "I") | |
| 805 I3<-sum(Column3 %in% "I") | |
| 806 I4<-sum(Column4 %in% "I") | |
| 807 I5<-sum(Column5 %in% "I") | |
| 808 I6<-sum(Column6 %in% "I") | |
| 809 I7<-sum(Column7 %in% "I") | |
| 810 I8<-sum(Column8 %in% "I") | |
| 811 I9<-sum(Column9 %in% "I") | |
| 812 I10<-sum(Column10 %in% "I") | |
| 813 I11<-sum(Column11 %in% "I") | |
| 814 I12<-sum(Column12 %in% "I") | |
| 815 I13<-sum(Column13 %in% "I") | |
| 816 I14<-sum(Column14 %in% "I") | |
| 817 I15<-sum(Column15 %in% "I") | |
| 818 IllIs<-cbind(I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,I14,I15) | |
| 819 | |
| 820 K1<-sum(Column1 %in% "K") | |
| 821 K2<-sum(Column2 %in% "K") | |
| 822 K3<-sum(Column3 %in% "K") | |
| 823 K4<-sum(Column4 %in% "K") | |
| 824 K5<-sum(Column5 %in% "K") | |
| 825 K6<-sum(Column6 %in% "K") | |
| 826 K7<-sum(Column7 %in% "K") | |
| 827 K8<-sum(Column8 %in% "K") | |
| 828 K9<-sum(Column9 %in% "K") | |
| 829 K10<-sum(Column10 %in% "K") | |
| 830 K11<-sum(Column11 %in% "K") | |
| 831 K12<-sum(Column12 %in% "K") | |
| 832 K13<-sum(Column13 %in% "K") | |
| 833 K14<-sum(Column14 %in% "K") | |
| 834 K15<-sum(Column15 %in% "K") | |
| 835 KllKs<-cbind(K1,K2,K3,K4,K5,K6,K7,K8,K9,K10,K11,K12,K13,K14,K15) | |
| 836 | |
| 837 L1<-sum(Column1 %in% "L") | |
| 838 L2<-sum(Column2 %in% "L") | |
| 839 L3<-sum(Column3 %in% "L") | |
| 840 L4<-sum(Column4 %in% "L") | |
| 841 L5<-sum(Column5 %in% "L") | |
| 842 L6<-sum(Column6 %in% "L") | |
| 843 L7<-sum(Column7 %in% "L") | |
| 844 L8<-sum(Column8 %in% "L") | |
| 845 L9<-sum(Column9 %in% "L") | |
| 846 L10<-sum(Column10 %in% "L") | |
| 847 L11<-sum(Column11 %in% "L") | |
| 848 L12<-sum(Column12 %in% "L") | |
| 849 L13<-sum(Column13 %in% "L") | |
| 850 L14<-sum(Column14 %in% "L") | |
| 851 L15<-sum(Column15 %in% "L") | |
| 852 LllLs<-cbind(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15) | |
| 853 | |
| 854 M1<-sum(Column1 %in% "M") | |
| 855 M2<-sum(Column2 %in% "M") | |
| 856 M3<-sum(Column3 %in% "M") | |
| 857 M4<-sum(Column4 %in% "M") | |
| 858 M5<-sum(Column5 %in% "M") | |
| 859 M6<-sum(Column6 %in% "M") | |
| 860 M7<-sum(Column7 %in% "M") | |
| 861 M8<-sum(Column8 %in% "M") | |
| 862 M9<-sum(Column9 %in% "M") | |
| 863 M10<-sum(Column10 %in% "M") | |
| 864 M11<-sum(Column11 %in% "M") | |
| 865 M12<-sum(Column12 %in% "M") | |
| 866 M13<-sum(Column13 %in% "M") | |
| 867 M14<-sum(Column14 %in% "M") | |
| 868 M15<-sum(Column15 %in% "M") | |
| 869 MllMs<-cbind(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,M13,M14,M15) | |
| 870 | |
| 871 N1<-sum(Column1 %in% "N") | |
| 872 N2<-sum(Column2 %in% "N") | |
| 873 N3<-sum(Column3 %in% "N") | |
| 874 N4<-sum(Column4 %in% "N") | |
| 875 N5<-sum(Column5 %in% "N") | |
| 876 N6<-sum(Column6 %in% "N") | |
| 877 N7<-sum(Column7 %in% "N") | |
| 878 N8<-sum(Column8 %in% "N") | |
| 879 N9<-sum(Column9 %in% "N") | |
| 880 N10<-sum(Column10 %in% "N") | |
| 881 N11<-sum(Column11 %in% "N") | |
| 882 N12<-sum(Column12 %in% "N") | |
| 883 N13<-sum(Column13 %in% "N") | |
| 884 N14<-sum(Column14 %in% "N") | |
| 885 N15<-sum(Column15 %in% "N") | |
| 886 NllNs<-cbind(N1,N2,N3,N4,N5,N6,N7,N8,N9,N10,N11,N12,N13,N14,N15) | |
| 887 | |
| 888 P1<-sum(Column1 %in% "P") | |
| 889 P2<-sum(Column2 %in% "P") | |
| 890 P3<-sum(Column3 %in% "P") | |
| 891 P4<-sum(Column4 %in% "P") | |
| 892 P5<-sum(Column5 %in% "P") | |
| 893 P6<-sum(Column6 %in% "P") | |
| 894 P7<-sum(Column7 %in% "P") | |
| 895 P8<-sum(Column8 %in% "P") | |
| 896 P9<-sum(Column9 %in% "P") | |
| 897 P10<-sum(Column10 %in% "P") | |
| 898 P11<-sum(Column11 %in% "P") | |
| 899 P12<-sum(Column12 %in% "P") | |
| 900 P13<-sum(Column13 %in% "P") | |
| 901 P14<-sum(Column14 %in% "P") | |
| 902 P15<-sum(Column15 %in% "P") | |
| 903 PllPs<-cbind(P1,P2,P3,P4,P5,P6,P7,P8,P9,P10,P11,P12,P13,P14,P15) | |
| 904 | |
| 905 Q1<-sum(Column1 %in% "Q") | |
| 906 Q2<-sum(Column2 %in% "Q") | |
| 907 Q3<-sum(Column3 %in% "Q") | |
| 908 Q4<-sum(Column4 %in% "Q") | |
| 909 Q5<-sum(Column5 %in% "Q") | |
| 910 Q6<-sum(Column6 %in% "Q") | |
| 911 Q7<-sum(Column7 %in% "Q") | |
| 912 Q8<-sum(Column8 %in% "Q") | |
| 913 Q9<-sum(Column9 %in% "Q") | |
| 914 Q10<-sum(Column10 %in% "Q") | |
| 915 Q11<-sum(Column11 %in% "Q") | |
| 916 Q12<-sum(Column12 %in% "Q") | |
| 917 Q13<-sum(Column13 %in% "Q") | |
| 918 Q14<-sum(Column14 %in% "Q") | |
| 919 Q15<-sum(Column15 %in% "Q") | |
| 920 QllQs<-cbind(Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15) | |
| 921 | |
| 922 R1<-sum(Column1 %in% "R") | |
| 923 R2<-sum(Column2 %in% "R") | |
| 924 R3<-sum(Column3 %in% "R") | |
| 925 R4<-sum(Column4 %in% "R") | |
| 926 R5<-sum(Column5 %in% "R") | |
| 927 R6<-sum(Column6 %in% "R") | |
| 928 R7<-sum(Column7 %in% "R") | |
| 929 R8<-sum(Column8 %in% "R") | |
| 930 R9<-sum(Column9 %in% "R") | |
| 931 R10<-sum(Column10 %in% "R") | |
| 932 R11<-sum(Column11 %in% "R") | |
| 933 R12<-sum(Column12 %in% "R") | |
| 934 R13<-sum(Column13 %in% "R") | |
| 935 R14<-sum(Column14 %in% "R") | |
| 936 R15<-sum(Column15 %in% "R") | |
| 937 RllRs<-cbind(R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,R13,R14,R15) | |
| 938 | |
| 939 S1<-sum(Column1 %in% "S") | |
| 940 S2<-sum(Column2 %in% "S") | |
| 941 S3<-sum(Column3 %in% "S") | |
| 942 S4<-sum(Column4 %in% "S") | |
| 943 S5<-sum(Column5 %in% "S") | |
| 944 S6<-sum(Column6 %in% "S") | |
| 945 S7<-sum(Column7 %in% "S") | |
| 946 S8<-sum(Column8 %in% "S") | |
| 947 S9<-sum(Column9 %in% "S") | |
| 948 S10<-sum(Column10 %in% "S") | |
| 949 S11<-sum(Column11 %in% "S") | |
| 950 S12<-sum(Column12 %in% "S") | |
| 951 S13<-sum(Column13 %in% "S") | |
| 952 S14<-sum(Column14 %in% "S") | |
| 953 S15<-sum(Column15 %in% "S") | |
| 954 SllSs<-cbind(S1,S2,S3,S4,S5,S6,S7,S8,S9,S10,S11,S12,S13,S14,S15) | |
| 955 | |
| 956 T1<-sum(Column1 %in% "T") | |
| 957 T2<-sum(Column2 %in% "T") | |
| 958 T3<-sum(Column3 %in% "T") | |
| 959 T4<-sum(Column4 %in% "T") | |
| 960 T5<-sum(Column5 %in% "T") | |
| 961 T6<-sum(Column6 %in% "T") | |
| 962 T7<-sum(Column7 %in% "T") | |
| 963 T8<-sum(Column8 %in% "T") | |
| 964 T9<-sum(Column9 %in% "T") | |
| 965 T10<-sum(Column10 %in% "T") | |
| 966 T11<-sum(Column11 %in% "T") | |
| 967 T12<-sum(Column12 %in% "T") | |
| 968 T13<-sum(Column13 %in% "T") | |
| 969 T14<-sum(Column14 %in% "T") | |
| 970 T15<-sum(Column15 %in% "T") | |
| 971 TllTs<-cbind(T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15) | |
| 972 | |
| 973 V1<-sum(Column1 %in% "V") | |
| 974 V2<-sum(Column2 %in% "V") | |
| 975 V3<-sum(Column3 %in% "V") | |
| 976 V4<-sum(Column4 %in% "V") | |
| 977 V5<-sum(Column5 %in% "V") | |
| 978 V6<-sum(Column6 %in% "V") | |
| 979 V7<-sum(Column7 %in% "V") | |
| 980 V8<-sum(Column8 %in% "V") | |
| 981 V9<-sum(Column9 %in% "V") | |
| 982 V10<-sum(Column10 %in% "V") | |
| 983 V11<-sum(Column11 %in% "V") | |
| 984 V12<-sum(Column12 %in% "V") | |
| 985 V13<-sum(Column13 %in% "V") | |
| 986 V14<-sum(Column14 %in% "V") | |
| 987 V15<-sum(Column15 %in% "V") | |
| 988 VllVs<-cbind(V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15) | |
| 989 | |
| 990 W1<-sum(Column1 %in% "W") | |
| 991 W2<-sum(Column2 %in% "W") | |
| 992 W3<-sum(Column3 %in% "W") | |
| 993 W4<-sum(Column4 %in% "W") | |
| 994 W5<-sum(Column5 %in% "W") | |
| 995 W6<-sum(Column6 %in% "W") | |
| 996 W7<-sum(Column7 %in% "W") | |
| 997 W8<-sum(Column8 %in% "W") | |
| 998 W9<-sum(Column9 %in% "W") | |
| 999 W10<-sum(Column10 %in% "W") | |
| 1000 W11<-sum(Column11 %in% "W") | |
| 1001 W12<-sum(Column12 %in% "W") | |
| 1002 W13<-sum(Column13 %in% "W") | |
| 1003 W14<-sum(Column14 %in% "W") | |
| 1004 W15<-sum(Column15 %in% "W") | |
| 1005 WllWs<-cbind(W1,W2,W3,W4,W5,W6,W7,W8,W9,W10,W11,W12,W13,W14,W15) | |
| 1006 | |
| 1007 Y1<-sum(Column1 %in% "Y") | |
| 1008 Y2<-sum(Column2 %in% "Y") | |
| 1009 Y3<-sum(Column3 %in% "Y") | |
| 1010 Y4<-sum(Column4 %in% "Y") | |
| 1011 Y5<-sum(Column5 %in% "Y") | |
| 1012 Y6<-sum(Column6 %in% "Y") | |
| 1013 Y7<-sum(Column7 %in% "Y") | |
| 1014 Y8<-sum(Column8 %in% "Y") | |
| 1015 Y9<-sum(Column9 %in% "Y") | |
| 1016 Y10<-sum(Column10 %in% "Y") | |
| 1017 Y11<-sum(Column11 %in% "Y") | |
| 1018 Y12<-sum(Column12 %in% "Y") | |
| 1019 Y13<-sum(Column13 %in% "Y") | |
| 1020 Y14<-sum(Column14 %in% "Y") | |
| 1021 Y15<-sum(Column15 %in% "Y") | |
| 1022 YllYs<-cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7,Y8,Y9,Y10,Y11,Y12,Y13,Y14,Y15) | |
| 1023 PositionTable<-rbind(AllAs,CllCs,DllDs,EllEs,FllFs,GllGs,HllHs,IllIs,KllKs,LllLs,MllMs,NllNs,PllPs,QllQs,RllRs,SllSs,TllTs,VllVs,WllWs,YllYs) | |
| 1024 } | |
| 1025 #endogenous prob matrix is AA position over subbackfreqmean | |
| 1026 dim(PositionTable) | |
| 1027 EPMtable<-PositionTable | |
| 1028 # EPMtable[1,]<-(PositionTable[1,]/(PositionTable[1,]*.01*Amean)) | |
| 1029 # EPMtable[2,]<-(PositionTable[2,]/(PositionTable[2,]*.01*Cmean)) | |
| 1030 # EPMtable[3,]<-(PositionTable[3,]/(PositionTable[3,]*.01*Dmean)) | |
| 1031 # EPMtable[4,]<-(PositionTable[4,]/(PositionTable[4,]*.01*Emean)) | |
| 1032 # EPMtable[5,]<-(PositionTable[5,]/(PositionTable[5,]*.01*Fmean)) | |
| 1033 # EPMtable[6,]<-(PositionTable[6,]/(PositionTable[6,]*.01*Gmean)) | |
| 1034 # EPMtable[7,]<-(PositionTable[7,]/(PositionTable[7,]*.01*Hmean)) | |
| 1035 # EPMtable[8,]<-(PositionTable[8,]/(PositionTable[8,]*.01*Imean)) | |
| 1036 # EPMtable[9,]<-(PositionTable[9,]/(PositionTable[9,]*.01*Kmean)) | |
| 1037 # EPMtable[10,]<-(PositionTable[10,]/(PositionTable[10,]*.01*Lmean)) | |
| 1038 # EPMtable[11,]<-(PositionTable[11,]/(PositionTable[11,]*.01*Mmean)) | |
| 1039 # EPMtable[12,]<-(PositionTable[12,]/(PositionTable[12,]*.01*Nmean)) | |
| 1040 # EPMtable[13,]<-(PositionTable[13,]/(PositionTable[13,]*.01*Pmean)) | |
| 1041 # EPMtable[14,]<-(PositionTable[14,]/(PositionTable[14,]*.01*Qmean)) | |
| 1042 # EPMtable[15,]<-(PositionTable[15,]/(PositionTable[15,]*.01*Rmean)) | |
| 1043 # EPMtable[16,]<-(PositionTable[16,]/(PositionTable[16,]*.01*Smean)) | |
| 1044 # EPMtable[17,]<-(PositionTable[17,]/(PositionTable[17,]*.01*Tmean)) | |
| 1045 # EPMtable[18,]<-(PositionTable[18,]/(PositionTable[18,]*.01*Vmean)) | |
| 1046 # EPMtable[19,]<-(PositionTable[19,]/(PositionTable[19,]*.01*Wmean)) | |
| 1047 # EPMtable[20,]<-(PositionTable[20,]/(PositionTable[20,]*.01*Ymean)) | |
| 1048 | |
| 1049 columns<-c(length(Column1)-sum(Column1==""), | |
| 1050 length(Column2)-sum(Column2==""), | |
| 1051 length(Column3)-sum(Column3==""), | |
| 1052 length(Column4)-sum(Column4==""), | |
| 1053 length(Column5)-sum(Column5==""), | |
| 1054 length(Column6)-sum(Column6==""), | |
| 1055 length(Column7)-sum(Column7==""), | |
| 1056 length(Column8)-sum(Column8==""), | |
| 1057 length(Column9)-sum(Column9==""), | |
| 1058 length(Column10)-sum(Column10==""), | |
| 1059 length(Column11)-sum(Column11==""), | |
| 1060 length(Column12)-sum(Column12==""), | |
| 1061 length(Column13)-sum(Column13==""), | |
| 1062 length(Column14)-sum(Column14==""), | |
| 1063 length(Column15)-sum(Column15=="")) | |
| 1064 | |
| 1065 for (z in 1:15) { | |
| 1066 for (y in 1:20) { | |
| 1067 if (PositionTable[y,z]>0){ | |
| 1068 EPMtable[y,z]<-PositionTable[y,z]/((columns[z]*.01*AllMeans[y])) | |
| 1069 } | |
| 1070 if (PositionTable[y,z]==0){ | |
| 1071 EPMtable[y,z]<-(1/columns[z])/((columns[z]*.01*AllMeans[y])) | |
| 1072 } | |
| 1073 } | |
| 1074 } | |
| 1075 #here I created the endogenous probability matrix | |
| 1076 #now all I need to do is make the program automatically determine which SDs are >2, and then make it perform screener and sorter on those SDs | |
| 1077 | |
| 1078 | |
| 1079 | |
| 1080 | |
| 1081 | |
| 1082 # write.xlsx(SDtable,file=FILENAME, sheetName = "Standard Deviation Table",col.names = FALSE,row.names = FALSE,append = TRUE) | |
| 1083 # write.xlsx(PercentTable,file = FILENAME,sheetName = "Percent Table",col.names = FALSE,row.names = FALSE,append = TRUE) | |
| 1084 # write.xlsx(SelectivitySheet,file = FILENAME,sheetName = "Site Selectivity",col.names = FALSE,row.names = FALSE,append = TRUE) | |
| 1085 # write.xlsx(EPMtable,file=FILENAME,sheetName = "Endogenous Probability Matrix",col.names = FALSE,row.names = FALSE,append = TRUE) | |
| 1086 # write.xlsx(NormalizationScore,file = FILENAME,sheetName = "Normalization Score",col.names = FALSE,row.names = FALSE,append = TRUE) | |
| 1087 | |
| 1088 NormalizationScore<-c("Normalization Score",NormalizationScore) | |
| 1089 | |
| 1090 write.table(x=matrix(c("SD Table","please","just","ignore","these","words","up","here","for", | |
| 1091 "now","yo","yo","yo","yo","yo","yo"), | |
| 1092 nrow=1),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) | |
| 1093 write.table(SDtable,file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) | |
| 1094 write.table(x=c("Percent Table"),file=FILENAME,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) | |
| 1095 write.table(PercentTable,file=FILENAME, append = TRUE,sep=",",row.names = FALSE, col.names = FALSE) | |
| 1096 | |
| 1097 EPMtableu<-EPMtable | |
| 1098 HeaderSD<-c(-7:7) | |
| 1099 EPMtableu<-rbind(HeaderSD,EPMtableu) | |
| 1100 row.names(EPMtableu)<-NULL | |
| 1101 EPMtableu<-data.frame(SetOfAAs,EPMtableu) | |
| 1102 | |
| 1103 write.table(matrix(c("Selectivity","just","ignore","these","words","up","here","for","now","yo","yo","yo","yo","yo","yo"), | |
| 1104 nrow=1),file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE) | |
| 1105 SelectivityHeader=matrix(data = c("Position",-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7),nrow = 1) | |
| 1106 head<-matrix(data=rep(" ",times=16),nrow = 1) | |
| 1107 SelectivityHeader<-rbind(head,SelectivityHeader) | |
| 1108 | |
| 1109 write.table(SelectivityHeader, file = FILENAME2, append = TRUE, sep = ",", row.names = FALSE, col.names = FALSE) | |
| 1110 #colnames(SelectivitySheet)<-c("-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7") | |
| 1111 write.table(SelectivitySheet,file = FILENAME2, append = TRUE,sep = ",",row.names = TRUE, col.names = FALSE) | |
| 1112 write.table(x=c("Endogenous Probability Matrix"),file=FILENAME2,append = TRUE,sep=",", row.names = FALSE, col.names = FALSE) | |
| 1113 write.table(EPMtableu,file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE) | |
| 1114 write.table(NormalizationScore, file = FILENAME2, append = TRUE,sep = ",",row.names = FALSE, col.names = FALSE) | |
| 1115 | |
| 1116 | |
| 1117 options(warn = oldw) |
