Mercurial > repos > jfb > stcommon_and_dif
comparison ST CandD/test-data/CandD.R @ 0:1875d4b62014 draft default tip
Uploaded
| author | jfb |
|---|---|
| date | Wed, 27 Jun 2018 11:36:32 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1875d4b62014 |
|---|---|
| 1 FirstSubstrateSet<- read.csv("input1.csv", stringsAsFactors=FALSE) | |
| 2 Firstsubbackfreq<- read.csv("input2.csv", header=FALSE, stringsAsFactors=FALSE) | |
| 3 | |
| 4 SecondSubstrateSet<- read.csv("input3.csv", stringsAsFactors=FALSE) | |
| 5 Secondsubbackfreq<- read.csv("input4.csv", header=FALSE, stringsAsFactors=FALSE) | |
| 6 | |
| 7 ThirdSubstrateSet<- read.csv("input5.csv", stringsAsFactors=FALSE) | |
| 8 Thirdsubbackfreq<- read.csv("input6.csv", header=FALSE, stringsAsFactors=FALSE) | |
| 9 | |
| 10 | |
| 11 args = commandArgs(trailingOnly=TRUE) | |
| 12 | |
| 13 print(args[1]) | |
| 14 print(args[2]) | |
| 15 print(args[3]) | |
| 16 | |
| 17 | |
| 18 #ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps | |
| 19 FullMotifsOnly_questionmark<-args[1] | |
| 20 #If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps | |
| 21 TruncatedMotifsOnly_questionmark<-args[2] | |
| 22 #if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps) | |
| 23 Are_You_Looking_For_Commonality<-args[3] | |
| 24 | |
| 25 | |
| 26 #then put the names of your output files here | |
| 27 Shared_motifs_table<-"sharedmotifs.csv" | |
| 28 Shared_subbackfreq_table<-"sharedSBF.csv" | |
| 29 | |
| 30 # Shared_motifs_table<-"Shared motifs 7-27-17.csv" | |
| 31 # Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv" | |
| 32 | |
| 33 First_unshared_motifs_table<-"R1 substrates.csv" | |
| 34 First_unshared_subbackfreq<-"R1 SBF.csv" | |
| 35 | |
| 36 Second_unshared_motifs_table<-"R2 subs.csv" | |
| 37 Second_unshared_subbackfreq<-"R2 SBf.csv" | |
| 38 | |
| 39 Third_unshared_motifs_table<-"R3 subs.csv" | |
| 40 Third_unshared_subbackfreq<-"R3 SBF.csv" | |
| 41 | |
| 42 #final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles. I think I'll poke around | |
| 43 #other languages to see if any of them can do it. | |
| 44 #################################################################################################################################### | |
| 45 | |
| 46 | |
| 47 | |
| 48 | |
| 49 | |
| 50 Firstx<-rep("x",times=nrow(FirstSubstrateSet)) | |
| 51 FirstST<-FirstSubstrateSet[,11] | |
| 52 FirstxST<-cbind(Firstx,FirstST) | |
| 53 FirstxST<-as.data.frame(FirstxST) | |
| 54 FirstxST<-within(FirstxST, x <- paste(Firstx,FirstST,sep="")) | |
| 55 FirstSubstrateSet[,11]<-FirstxST[,3] | |
| 56 | |
| 57 Secondx<-rep("x",times=nrow(SecondSubstrateSet)) | |
| 58 SecondST<-SecondSubstrateSet[,11] | |
| 59 SecondxST<-cbind(Secondx,SecondST) | |
| 60 SecondxST<-as.data.frame(SecondxST) | |
| 61 SecondxST<-within(SecondxST, x <- paste(Secondx,SecondST,sep="")) | |
| 62 SecondSubstrateSet[,11]<-SecondxST[,3] | |
| 63 | |
| 64 Thirdx<-rep("x",times=nrow(ThirdSubstrateSet)) | |
| 65 ThirdST<-ThirdSubstrateSet[,11] | |
| 66 ThirdxST<-cbind(Thirdx,ThirdST) | |
| 67 ThirdxST<-as.data.frame(ThirdxST) | |
| 68 ThirdxST<-within(ThirdxST, x <- paste(Thirdx,ThirdST,sep="")) | |
| 69 ThirdSubstrateSet[,11]<-ThirdxST[,3] | |
| 70 | |
| 71 | |
| 72 | |
| 73 | |
| 74 | |
| 75 | |
| 76 | |
| 77 | |
| 78 | |
| 79 | |
| 80 #################################################################################################################################### | |
| 81 #################################################################################################################################### | |
| 82 # better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two | |
| 83 # separate proteins thus two separate accession numbers? | |
| 84 # It should actually output the shared motif and BOTH accession numbers. Right now it does not, it only maps out the second | |
| 85 # accession number. So that needs to be fixed BUT you need to keep the commonality between a motif and its accession number | |
| 86 #################################################################################################################################### | |
| 87 #################################################################################################################################### | |
| 88 #################################################################################################################################### | |
| 89 #################################################################################################################################### | |
| 90 | |
| 91 #Create the motif sets, deciding wether or not you're looking for truncated or full here | |
| 92 #full only | |
| 93 if (Are_You_Looking_For_Commonality=="YES"){ | |
| 94 if (FullMotifsOnly_questionmark=="YES"){ | |
| 95 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
| 96 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
| 97 leftspaces<-c() | |
| 98 rightspaces<-c() | |
| 99 for (i in 1:nrow(FirstSubstrateSet)){ | |
| 100 FTLwtletters<-FirstSubstrateSet[i,4:18] | |
| 101 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] | |
| 102 FTLwtletters<-paste(FTLwtletters, sep="", collapse="") | |
| 103 | |
| 104 | |
| 105 YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) | |
| 106 YYYposition <- match(x = "x", table = YYYmotif) | |
| 107 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 108 #just 3 letters to the left of x | |
| 109 | |
| 110 YYYLettersToTheLeft <- YYYposition - 1 | |
| 111 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 112 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 113 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 114 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 115 #variable the user puts in is | |
| 116 | |
| 117 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { | |
| 118 motif<-YYYmotif | |
| 119 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 120 motif<-motif[!motif %in% "x"] | |
| 121 motif<-paste(motif, sep="", collapse="") | |
| 122 FTLwtletters<-motif | |
| 123 FTLwtmotifs[i,1]<-FTLwtletters | |
| 124 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 125 | |
| 126 } | |
| 127 | |
| 128 } | |
| 129 # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)] | |
| 130 # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1) | |
| 131 # | |
| 132 | |
| 133 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) | |
| 134 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) | |
| 135 | |
| 136 for (i in 1:nrow(SecondSubstrateSet)){ | |
| 137 D835letters<-SecondSubstrateSet[i,4:18] | |
| 138 D835letters<-D835letters[D835letters !="XXXXX"] | |
| 139 D835letters<-paste(D835letters, sep="", collapse="") | |
| 140 | |
| 141 | |
| 142 YYYmotif <- unlist(strsplit(D835letters, split = "")) | |
| 143 YYYposition <- match(x = "x", table = YYYmotif) | |
| 144 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 145 #just 3 letters to the left of x | |
| 146 | |
| 147 YYYLettersToTheLeft <- YYYposition - 1 | |
| 148 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 149 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 150 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 151 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 152 #variable the user puts in is | |
| 153 | |
| 154 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { | |
| 155 motif<-YYYmotif | |
| 156 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 157 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 158 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 159 motif<-motif[!motif %in% "x"] | |
| 160 motif<-paste(motif, sep="", collapse="") | |
| 161 D835letters<-motif | |
| 162 D835Ymotifs[i,1]<-D835letters | |
| 163 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] | |
| 164 | |
| 165 } | |
| 166 } | |
| 167 | |
| 168 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) | |
| 169 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) | |
| 170 | |
| 171 for (i in 1:nrow(ThirdSubstrateSet)){ | |
| 172 ITDletters<-ThirdSubstrateSet[i,4:18] | |
| 173 ITDletters<-ITDletters[ITDletters !="XXXXX"] | |
| 174 ITDletters<-paste(ITDletters, sep="", collapse="") | |
| 175 YYYmotif <- unlist(strsplit(ITDletters, split = "")) | |
| 176 YYYposition <- match(x = "x", table = YYYmotif) | |
| 177 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 178 #just 3 letters to the left of x | |
| 179 | |
| 180 YYYLettersToTheLeft <- YYYposition - 1 | |
| 181 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 182 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 183 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 184 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 185 #variable the user puts in is | |
| 186 | |
| 187 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { | |
| 188 motif<-YYYmotif | |
| 189 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 190 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 191 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 192 motif<-motif[!motif %in% "x"] | |
| 193 motif<-paste(motif, sep="", collapse="") | |
| 194 ITDletters<-motif | |
| 195 ITDmotifs[i,1]<-ITDletters | |
| 196 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3] | |
| 197 | |
| 198 } | |
| 199 } | |
| 200 | |
| 201 } | |
| 202 | |
| 203 ##############################################3 | |
| 204 #Truncated only | |
| 205 if (TruncatedMotifsOnly_questionmark=="YES"){ | |
| 206 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
| 207 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
| 208 | |
| 209 for (i in 1:nrow(FirstSubstrateSet)){ | |
| 210 FTLwtletters<-FirstSubstrateSet[i,4:18] | |
| 211 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] | |
| 212 FTLwtletters<-paste(FTLwtletters, sep="", collapse="") | |
| 213 | |
| 214 | |
| 215 YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) | |
| 216 YYYposition <- match(x = "x", table = YYYmotif) | |
| 217 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 218 #just 3 letters to the left of x | |
| 219 | |
| 220 YYYLettersToTheLeft <- YYYposition - 1 | |
| 221 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 222 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 223 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 224 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 225 #variable the user puts in is | |
| 226 | |
| 227 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
| 228 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
| 229 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
| 230 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 231 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 232 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 233 motif<-motif[!motif %in% "x"] | |
| 234 motif<-paste(motif, sep="", collapse="") | |
| 235 FTLwtletters<-motif | |
| 236 FTLwtmotifs[i,1]<-FTLwtletters | |
| 237 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 238 } | |
| 239 | |
| 240 } | |
| 241 | |
| 242 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) | |
| 243 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) | |
| 244 | |
| 245 for (i in 1:nrow(SecondSubstrateSet)){ | |
| 246 D835letters<-SecondSubstrateSet[i,4:18] | |
| 247 D835letters<-D835letters[D835letters !="XXXXX"] | |
| 248 D835letters<-paste(D835letters, sep="", collapse="") | |
| 249 | |
| 250 | |
| 251 YYYmotif <- unlist(strsplit(D835letters, split = "")) | |
| 252 YYYposition <- match(x = "x", table = YYYmotif) | |
| 253 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 254 #just 3 letters to the left of x | |
| 255 | |
| 256 YYYLettersToTheLeft <- YYYposition - 1 | |
| 257 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 258 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 259 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 260 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 261 #variable the user puts in is | |
| 262 | |
| 263 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
| 264 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
| 265 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
| 266 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 267 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 268 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 269 motif<-motif[!motif %in% "x"] | |
| 270 motif<-paste(motif, sep="", collapse="") | |
| 271 D835letters<-motif | |
| 272 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] | |
| 273 D835Ymotifs[i,1]<-D835letters | |
| 274 } | |
| 275 } | |
| 276 | |
| 277 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) | |
| 278 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) | |
| 279 | |
| 280 for (i in 1:nrow(ThirdSubstrateSet)){ | |
| 281 ITDletters<-ThirdSubstrateSet[i,4:18] | |
| 282 ITDletters<-ITDletters[ITDletters !="XXXXX"] | |
| 283 ITDletters<-paste(ITDletters, sep="", collapse="") | |
| 284 YYYmotif <- unlist(strsplit(ITDletters, split = "")) | |
| 285 YYYposition <- match(x = "x", table = YYYmotif) | |
| 286 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 287 #just 3 letters to the left of x | |
| 288 | |
| 289 YYYLettersToTheLeft <- YYYposition - 1 | |
| 290 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 291 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 292 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 293 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 294 #variable the user puts in is | |
| 295 | |
| 296 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
| 297 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
| 298 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
| 299 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 300 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 301 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 302 motif<-motif[!motif %in% "x"] | |
| 303 motif<-paste(motif, sep="", collapse="") | |
| 304 ITDletters<-motif | |
| 305 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3] | |
| 306 ITDmotifs[i,1]<-ITDletters | |
| 307 } | |
| 308 } | |
| 309 | |
| 310 } | |
| 311 | |
| 312 ############################################### | |
| 313 #ALL motifs, full and truncated | |
| 314 | |
| 315 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){ | |
| 316 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
| 317 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
| 318 | |
| 319 for (i in 1:nrow(FirstSubstrateSet)){ | |
| 320 FTLwtletters<-FirstSubstrateSet[i,4:18] | |
| 321 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] | |
| 322 FTLwtletters<-paste(FTLwtletters, sep="", collapse="") | |
| 323 leftspaces<-c() | |
| 324 rightspaces<-c() | |
| 325 | |
| 326 YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) | |
| 327 YYYposition <- match(x = "x", table = YYYmotif) | |
| 328 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 329 #just 3 letters to the left of x | |
| 330 | |
| 331 YYYLettersToTheLeft <- YYYposition - 1 | |
| 332 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 333 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 334 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 335 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 336 #variable the user puts in is | |
| 337 | |
| 338 | |
| 339 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
| 340 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
| 341 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
| 342 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 343 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 344 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 345 motif<-motif[!motif %in% "x"] | |
| 346 motif<-paste(motif, sep="", collapse="") | |
| 347 FTLwtletters<-motif | |
| 348 FTLwtmotifs[i,1]<-FTLwtletters | |
| 349 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 350 } | |
| 351 | |
| 352 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
| 353 motif<-YYYmotif | |
| 354 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 355 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 356 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 357 motif<-motif[!motif %in% "x"] | |
| 358 motif<-paste(motif, sep="", collapse="") | |
| 359 FTLwtletters<-motif | |
| 360 FTLwtmotifs[i,1]<-FTLwtletters | |
| 361 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 362 | |
| 363 | |
| 364 } | |
| 365 | |
| 366 } | |
| 367 | |
| 368 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) | |
| 369 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) | |
| 370 | |
| 371 for (i in 1:nrow(SecondSubstrateSet)){ | |
| 372 D835letters<-SecondSubstrateSet[i,4:18] | |
| 373 D835letters<-D835letters[D835letters !="XXXXX"] | |
| 374 D835letters<-paste(D835letters, sep="", collapse="") | |
| 375 leftspaces<-c() | |
| 376 rightspaces<-c() | |
| 377 | |
| 378 YYYmotif <- unlist(strsplit(D835letters, split = "")) | |
| 379 YYYposition <- match(x = "x", table = YYYmotif) | |
| 380 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 381 #just 3 letters to the left of x | |
| 382 | |
| 383 YYYLettersToTheLeft <- YYYposition - 1 | |
| 384 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 385 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 386 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 387 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 388 #variable the user puts in is | |
| 389 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
| 390 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
| 391 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
| 392 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 393 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 394 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 395 motif<-motif[!motif %in% "x"] | |
| 396 motif<-paste(motif, sep="", collapse="") | |
| 397 D835letters<-motif | |
| 398 D835Ymotifs[i,1]<-D835letters | |
| 399 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 400 } | |
| 401 | |
| 402 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
| 403 motif<-YYYmotif | |
| 404 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 405 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 406 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 407 motif<-motif[!motif %in% "x"] | |
| 408 motif<-paste(motif, sep="", collapse="") | |
| 409 D835letters<-motif | |
| 410 D835Ymotifs[i,1]<-D835letters | |
| 411 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 412 } | |
| 413 } | |
| 414 | |
| 415 | |
| 416 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) | |
| 417 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) | |
| 418 | |
| 419 for (i in 1:nrow(ThirdSubstrateSet)){ | |
| 420 ITDletters<-ThirdSubstrateSet[i,4:18] | |
| 421 ITDletters<-ITDletters[ITDletters !="XXXXX"] | |
| 422 ITDletters<-paste(ITDletters, sep="", collapse="") | |
| 423 YYYmotif <- unlist(strsplit(ITDletters, split = "")) | |
| 424 leftspaces<-c() | |
| 425 rightspaces<-c() | |
| 426 YYYposition <- match(x = "x", table = YYYmotif) | |
| 427 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 428 #just 3 letters to the left of x | |
| 429 | |
| 430 YYYLettersToTheLeft <- YYYposition - 1 | |
| 431 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 432 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 433 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 434 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 435 #variable the user puts in is | |
| 436 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
| 437 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
| 438 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
| 439 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 440 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 441 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 442 motif<-motif[!motif %in% "x"] | |
| 443 motif<-paste(motif, sep="", collapse="") | |
| 444 ITDletters<-motif | |
| 445 ITDmotifs[i,1]<-ITDletters | |
| 446 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 447 } | |
| 448 | |
| 449 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
| 450 motif<-YYYmotif | |
| 451 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 452 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 453 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 454 motif<-motif[!motif %in% "x"] | |
| 455 motif<-paste(motif, sep="", collapse="") | |
| 456 ITDletters<-motif | |
| 457 ITDmotifs[i,1]<-ITDletters | |
| 458 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 459 } | |
| 460 } | |
| 461 | |
| 462 } | |
| 463 ############################################################################################################################# | |
| 464 ############################################################################################################################# | |
| 465 ############################################################################################################################# | |
| 466 ############################################################################################################################# | |
| 467 ############################################################################################################################# | |
| 468 | |
| 469 #now look for either commonality or difference. Actually could you look for both... | |
| 470 | |
| 471 if (Are_You_Looking_For_Commonality=="YES"){ | |
| 472 | |
| 473 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1])) | |
| 474 columnalheader<-matrix(columnalheader,nrow = 1) | |
| 475 # write.table(x=columnalheader, | |
| 476 # file=Shared_subbackfreq_table, | |
| 477 # quote=FALSE, sep=",", | |
| 478 # row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 479 | |
| 480 FirstOverlapmotifs<-c() | |
| 481 for (i in 1:nrow(ITDmotifs)){ | |
| 482 for (j in 1:nrow(D835Ymotifs)){ | |
| 483 if (is.na(ITDmotifs[i,1])!=TRUE&&is.na(D835Ymotifs[j,1])!=TRUE){ | |
| 484 if (ITDmotifs[i,1]==D835Ymotifs[j,1]){ | |
| 485 FirstOverlapmotifs<-c(FirstOverlapmotifs,D835Ymotifs[j,1]) | |
| 486 } | |
| 487 } | |
| 488 } | |
| 489 } | |
| 490 | |
| 491 AllAccessionNumbers<-c() | |
| 492 columnalheader<-c(rep(NA,36)) | |
| 493 FinalMatrix<-matrix(data =columnalheader,nrow = 1) | |
| 494 | |
| 495 FinalMotifs<-c(rep(NA,20)) | |
| 496 FinalMotifsMatrix<-matrix(data = FinalMotifs,nrow = 1) | |
| 497 | |
| 498 | |
| 499 for (l in 1:length(FirstOverlapmotifs)) { | |
| 500 AccessionNumber<-00000000000 | |
| 501 for (k in 1:nrow(FTLwtmotifs)) { | |
| 502 AccessionNumber<-0000000000000 | |
| 503 if(is.na(FTLwtmotifs[k])!=TRUE){ | |
| 504 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is | |
| 505 #destroyed immediately after use | |
| 506 if (FirstOverlapmotifs[l] == FTLwtmotifs[k]) { | |
| 507 substratematrix<-FirstSubstrateSet[k,1:20] | |
| 508 substratematrix<-as.matrix(substratematrix,nrow=1) | |
| 509 FinalMotifsMatrix<-rbind(FinalMotifsMatrix,substratematrix) | |
| 510 #when you find a match between the venn diagrams, save the substrate info you get into a matrix | |
| 511 | |
| 512 AccessionNumber <- as.character(FirstSubstrateSet[k, 3]) | |
| 513 #then take the accession number | |
| 514 | |
| 515 for (m in 1:ncol(Firstsubbackfreq)) { | |
| 516 AN <- as.character(Firstsubbackfreq[1, m]) | |
| 517 if (grepl(pattern = AN, | |
| 518 x = AccessionNumber, | |
| 519 fixed = TRUE) == TRUE) { | |
| 520 outputmatrix <- as.character(Firstsubbackfreq[, m]) | |
| 521 outputmatrix <- matrix(outputmatrix, nrow = 1) | |
| 522 #with that accession number, find a match in the subbackfreq file and save it here | |
| 523 FinalMatrix<-rbind(FinalMatrix,outputmatrix) | |
| 524 } | |
| 525 } | |
| 526 } | |
| 527 } | |
| 528 } | |
| 529 } | |
| 530 | |
| 531 | |
| 532 TrueMatrix<-FinalMatrix[!duplicated(FinalMatrix),] | |
| 533 TrueFinalMotifsMatrix<-FinalMotifsMatrix[!duplicated(FinalMotifsMatrix),] | |
| 534 | |
| 535 TrueFinalMotifsMatrix<-TrueFinalMotifsMatrix[2:nrow(TrueFinalMotifsMatrix),] | |
| 536 TrueMatrix<-TrueMatrix[2:nrow(TrueMatrix),] | |
| 537 | |
| 538 write.table( | |
| 539 x = TrueFinalMotifsMatrix, | |
| 540 file = Shared_motifs_table, | |
| 541 quote = FALSE, | |
| 542 sep = ",", | |
| 543 row.names = FALSE, | |
| 544 col.names = TRUE, | |
| 545 na = "", | |
| 546 append = FALSE | |
| 547 ) | |
| 548 | |
| 549 #TrueMatrix<-t(TrueMatrix) | |
| 550 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1])) | |
| 551 columnalheader<-matrix(columnalheader,nrow = 1) | |
| 552 | |
| 553 TrueMatrix<-rbind(columnalheader,TrueMatrix) | |
| 554 TrueMatrix<-t(TrueMatrix) | |
| 555 | |
| 556 write.table( | |
| 557 x = TrueMatrix, | |
| 558 file = Shared_subbackfreq_table, | |
| 559 quote = FALSE, | |
| 560 sep = ",", | |
| 561 row.names = FALSE, | |
| 562 col.names = FALSE, | |
| 563 na = "", | |
| 564 append = TRUE | |
| 565 ) | |
| 566 } | |
| 567 } | |
| 568 | |
| 569 if (Are_You_Looking_For_Commonality=="NO"){ | |
| 570 if (FullMotifsOnly_questionmark=="YES"){ | |
| 571 FTLwtmotifs=rep(NA,times=nrow(FirstSubstrateSet)) | |
| 572 FTLwtAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) | |
| 573 leftspaces<-c() | |
| 574 rightspaces<-c() | |
| 575 for (i in 1:nrow(FirstSubstrateSet)){ | |
| 576 FTLwtletters<-FirstSubstrateSet[i,4:18] | |
| 577 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] | |
| 578 FTLwtletters<-paste(FTLwtletters, sep="", collapse="") | |
| 579 | |
| 580 | |
| 581 YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) | |
| 582 YYYposition <- match(x = "x", table = YYYmotif) | |
| 583 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 584 #just 3 letters to the left of x | |
| 585 | |
| 586 YYYLettersToTheLeft <- YYYposition - 1 | |
| 587 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 588 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 589 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 590 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 591 #variable the user puts in is | |
| 592 | |
| 593 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { | |
| 594 motif<-YYYmotif | |
| 595 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 596 motif<-motif[!motif %in% "x"] | |
| 597 motif<-paste(motif, sep="", collapse="") | |
| 598 FTLwtletters<-motif | |
| 599 FTLwtmotifs[i]<-FTLwtletters | |
| 600 FTLwtAccessionNumbers[i]<-FirstSubstrateSet[i,3] | |
| 601 } | |
| 602 | |
| 603 } | |
| 604 # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)] | |
| 605 # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1) | |
| 606 # | |
| 607 | |
| 608 D835Ymotifs=rep(NA,times=nrow(FirstSubstrateSet)) | |
| 609 D835YAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) | |
| 610 | |
| 611 for (i in 1:nrow(SecondSubstrateSet)){ | |
| 612 D835letters<-SecondSubstrateSet[i,4:18] | |
| 613 D835letters<-D835letters[D835letters !="XXXXX"] | |
| 614 D835letters<-paste(D835letters, sep="", collapse="") | |
| 615 | |
| 616 | |
| 617 YYYmotif <- unlist(strsplit(D835letters, split = "")) | |
| 618 YYYposition <- match(x = "x", table = YYYmotif) | |
| 619 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 620 #just 3 letters to the left of x | |
| 621 | |
| 622 YYYLettersToTheLeft <- YYYposition - 1 | |
| 623 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 624 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 625 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 626 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 627 #variable the user puts in is | |
| 628 | |
| 629 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { | |
| 630 motif<-YYYmotif | |
| 631 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 632 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 633 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 634 motif<-motif[!motif %in% "x"] | |
| 635 motif<-paste(motif, sep="", collapse="") | |
| 636 D835letters<-motif | |
| 637 D835Ymotifs[i]<-D835letters | |
| 638 D835YAccessionNumbers[i]<-SecondSubstrateSet[i,3] | |
| 639 } | |
| 640 } | |
| 641 | |
| 642 ITDmotifs=rep(NA,times=nrow(FirstSubstrateSet)) | |
| 643 ITDAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet)) | |
| 644 | |
| 645 for (i in 1:nrow(ThirdSubstrateSet)){ | |
| 646 ITDletters<-ThirdSubstrateSet[i,4:18] | |
| 647 ITDletters<-ITDletters[ITDletters !="XXXXX"] | |
| 648 ITDletters<-paste(ITDletters, sep="", collapse="") | |
| 649 YYYmotif <- unlist(strsplit(ITDletters, split = "")) | |
| 650 YYYposition <- match(x = "x", table = YYYmotif) | |
| 651 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 652 #just 3 letters to the left of x | |
| 653 | |
| 654 YYYLettersToTheLeft <- YYYposition - 1 | |
| 655 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 656 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 657 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 658 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 659 #variable the user puts in is | |
| 660 | |
| 661 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) { | |
| 662 motif<-YYYmotif | |
| 663 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 664 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 665 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 666 motif<-motif[!motif %in% "x"] | |
| 667 motif<-paste(motif, sep="", collapse="") | |
| 668 ITDletters<-motif | |
| 669 ITDmotifs[i]<-ITDletters | |
| 670 ITDAccessionNumbers[i]<-ThirdSubstrateSet[i,3] | |
| 671 | |
| 672 } | |
| 673 } | |
| 674 names(ITDmotifs)<-ITDAccessionNumbers | |
| 675 names(D835Ymotifs)<-D835YAccessionNumbers | |
| 676 names(FTLwtmotifs)<-FTLwtAccessionNumbers | |
| 677 } | |
| 678 | |
| 679 | |
| 680 ##############################################3 | |
| 681 #Truncated only | |
| 682 if (TruncatedMotifsOnly_questionmark=="YES"){ | |
| 683 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
| 684 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
| 685 | |
| 686 for (i in 1:nrow(FirstSubstrateSet)){ | |
| 687 FTLwtletters<-FirstSubstrateSet[i,4:18] | |
| 688 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] | |
| 689 FTLwtletters<-paste(FTLwtletters, sep="", collapse="") | |
| 690 | |
| 691 | |
| 692 YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) | |
| 693 YYYposition <- match(x = "x", table = YYYmotif) | |
| 694 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 695 #just 3 letters to the left of x | |
| 696 | |
| 697 YYYLettersToTheLeft <- YYYposition - 1 | |
| 698 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 699 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 700 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 701 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 702 #variable the user puts in is | |
| 703 | |
| 704 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
| 705 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
| 706 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
| 707 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 708 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 709 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 710 motif<-motif[!motif %in% "x"] | |
| 711 motif<-paste(motif, sep="", collapse="") | |
| 712 FTLwtletters<-motif | |
| 713 FTLwtmotifs[i,1]<-FTLwtletters | |
| 714 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 715 } | |
| 716 | |
| 717 } | |
| 718 | |
| 719 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) | |
| 720 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) | |
| 721 i=2 | |
| 722 for (i in 1:nrow(SecondSubstrateSet)){ | |
| 723 D835letters<-SecondSubstrateSet[i,4:18] | |
| 724 D835letters<-D835letters[D835letters !="XXXXX"] | |
| 725 D835letters<-paste(D835letters, sep="", collapse="") | |
| 726 | |
| 727 | |
| 728 YYYmotif <- unlist(strsplit(D835letters, split = "")) | |
| 729 YYYposition <- match(x = "x", table = YYYmotif) | |
| 730 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 731 #just 3 letters to the left of x | |
| 732 | |
| 733 YYYLettersToTheLeft <- YYYposition - 1 | |
| 734 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 735 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 736 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 737 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 738 #variable the user puts in is | |
| 739 | |
| 740 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
| 741 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
| 742 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
| 743 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 744 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 745 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 746 motif<-motif[!motif %in% "x"] | |
| 747 motif<-paste(motif, sep="", collapse="") | |
| 748 D835letters<-motif | |
| 749 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3] | |
| 750 D835Ymotifs[i,1]<-D835letters | |
| 751 } | |
| 752 } | |
| 753 | |
| 754 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) | |
| 755 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) | |
| 756 | |
| 757 for (i in 1:nrow(ThirdSubstrateSet)){ | |
| 758 ITDletters<-ThirdSubstrateSet[i,4:18] | |
| 759 ITDletters<-ITDletters[ITDletters !="XXXXX"] | |
| 760 ITDletters<-paste(ITDletters, sep="", collapse="") | |
| 761 YYYmotif <- unlist(strsplit(ITDletters, split = "")) | |
| 762 YYYposition <- match(x = "x", table = YYYmotif) | |
| 763 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 764 #just 3 letters to the left of x | |
| 765 | |
| 766 YYYLettersToTheLeft <- YYYposition - 1 | |
| 767 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 768 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 769 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 770 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 771 #variable the user puts in is | |
| 772 | |
| 773 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
| 774 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
| 775 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
| 776 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 777 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 778 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 779 motif<-motif[!motif %in% "x"] | |
| 780 motif<-paste(motif, sep="", collapse="") | |
| 781 ITDletters<-motif | |
| 782 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3] | |
| 783 ITDmotifs[i,1]<-ITDletters | |
| 784 } | |
| 785 } | |
| 786 names(FTLwtmotifs)<-FTLwtAccessionNumbers | |
| 787 names(D835Ymotifs)<-D835YAccessionNumbers | |
| 788 names(ITDmotifs)<-ITDAccessionNumbers | |
| 789 } | |
| 790 | |
| 791 ############################################### | |
| 792 #ALL motifs, full and truncated | |
| 793 | |
| 794 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){ | |
| 795 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
| 796 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
| 797 | |
| 798 for (i in 1:nrow(FirstSubstrateSet)){ | |
| 799 FTLwtletters<-FirstSubstrateSet[i,4:18] | |
| 800 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] | |
| 801 FTLwtletters<-paste(FTLwtletters, sep="", collapse="") | |
| 802 leftspaces<-c() | |
| 803 rightspaces<-c() | |
| 804 | |
| 805 YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) | |
| 806 YYYposition <- match(x = "x", table = YYYmotif) | |
| 807 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 808 #just 3 letters to the left of x | |
| 809 | |
| 810 YYYLettersToTheLeft <- YYYposition - 1 | |
| 811 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 812 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 813 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 814 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 815 #variable the user puts in is | |
| 816 | |
| 817 | |
| 818 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
| 819 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
| 820 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
| 821 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 822 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 823 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 824 motif<-motif[!motif %in% "x"] | |
| 825 motif<-paste(motif, sep="", collapse="") | |
| 826 FTLwtletters<-motif | |
| 827 FTLwtmotifs[i,1]<-FTLwtletters | |
| 828 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 829 } | |
| 830 | |
| 831 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
| 832 motif<-YYYmotif | |
| 833 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 834 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 835 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 836 motif<-motif[!motif %in% "x"] | |
| 837 motif<-paste(motif, sep="", collapse="") | |
| 838 FTLwtletters<-motif | |
| 839 FTLwtmotifs[i,1]<-FTLwtletters | |
| 840 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 841 | |
| 842 | |
| 843 } | |
| 844 | |
| 845 } | |
| 846 | |
| 847 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) | |
| 848 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) | |
| 849 | |
| 850 for (i in 1:nrow(SecondSubstrateSet)){ | |
| 851 D835letters<-SecondSubstrateSet[i,4:18] | |
| 852 D835letters<-D835letters[D835letters !="XXXXX"] | |
| 853 D835letters<-paste(D835letters, sep="", collapse="") | |
| 854 leftspaces<-c() | |
| 855 rightspaces<-c() | |
| 856 | |
| 857 YYYmotif <- unlist(strsplit(D835letters, split = "")) | |
| 858 YYYposition <- match(x = "x", table = YYYmotif) | |
| 859 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 860 #just 3 letters to the left of x | |
| 861 | |
| 862 YYYLettersToTheLeft <- YYYposition - 1 | |
| 863 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 864 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 865 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 866 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 867 #variable the user puts in is | |
| 868 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
| 869 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
| 870 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
| 871 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 872 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 873 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 874 motif<-motif[!motif %in% "x"] | |
| 875 motif<-paste(motif, sep="", collapse="") | |
| 876 D835letters<-motif | |
| 877 D835Ymotifs[i,1]<-D835letters | |
| 878 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 879 } | |
| 880 | |
| 881 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
| 882 motif<-YYYmotif | |
| 883 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 884 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 885 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 886 motif<-motif[!motif %in% "x"] | |
| 887 motif<-paste(motif, sep="", collapse="") | |
| 888 D835letters<-motif | |
| 889 D835Ymotifs[i,1]<-D835letters | |
| 890 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 891 } | |
| 892 } | |
| 893 | |
| 894 | |
| 895 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1) | |
| 896 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet)) | |
| 897 | |
| 898 for (i in 1:nrow(ThirdSubstrateSet)){ | |
| 899 ITDletters<-ThirdSubstrateSet[i,4:18] | |
| 900 ITDletters<-ITDletters[ITDletters !="XXXXX"] | |
| 901 ITDletters<-paste(ITDletters, sep="", collapse="") | |
| 902 YYYmotif <- unlist(strsplit(ITDletters, split = "")) | |
| 903 leftspaces<-c() | |
| 904 rightspaces<-c() | |
| 905 YYYposition <- match(x = "x", table = YYYmotif) | |
| 906 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 907 #just 3 letters to the left of x | |
| 908 | |
| 909 YYYLettersToTheLeft <- YYYposition - 1 | |
| 910 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 911 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 912 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 913 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 914 #variable the user puts in is | |
| 915 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
| 916 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
| 917 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
| 918 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 919 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 920 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 921 motif<-motif[!motif %in% "x"] | |
| 922 motif<-paste(motif, sep="", collapse="") | |
| 923 ITDletters<-motif | |
| 924 ITDmotifs[i,1]<-ITDletters | |
| 925 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 926 } | |
| 927 | |
| 928 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
| 929 motif<-YYYmotif | |
| 930 #add blank spaces if the motif has less than 4 letters to the left/right | |
| 931 motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 932 #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 933 motif<-motif[!motif %in% "x"] | |
| 934 motif<-paste(motif, sep="", collapse="") | |
| 935 ITDletters<-motif | |
| 936 ITDmotifs[i,1]<-ITDletters | |
| 937 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 938 } | |
| 939 } | |
| 940 names(FTLwtmotifs)<-FTLwtAccessionNumbers | |
| 941 names(D835Ymotifs)<-D835YAccessionNumbers | |
| 942 names(ITDmotifs)<-ITDAccessionNumbers | |
| 943 } | |
| 944 | |
| 945 | |
| 946 FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs] | |
| 947 FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!FTLwtmotifsFINAL %in% ITDmotifs] | |
| 948 FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)] | |
| 949 | |
| 950 | |
| 951 ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% D835Ymotifs] | |
| 952 ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FTLwtmotifs] | |
| 953 ITDmotifsFINAL<-ITDmotifsFINAL[!duplicated(ITDmotifsFINAL)] | |
| 954 | |
| 955 | |
| 956 D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs] | |
| 957 D835YmotifsFINAL<-D835YmotifsFINAL[!D835YmotifsFINAL %in% ITDmotifs] | |
| 958 D835YmotifsFINAL<-D835YmotifsFINAL[!duplicated(D835YmotifsFINAL)] | |
| 959 | |
| 960 | |
| 961 columnalheader<-c(rep(NA,36)) | |
| 962 FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1) | |
| 963 | |
| 964 for (k in 1:length(FTLwtmotifsFINAL)) { | |
| 965 AN<-00000 | |
| 966 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is | |
| 967 #destroyed immediately after use | |
| 968 for (m in 1:ncol(Firstsubbackfreq)) { | |
| 969 AN <- as.character(Firstsubbackfreq[1, m]) | |
| 970 if (grepl(pattern = AN, | |
| 971 x = names(FTLwtmotifsFINAL[k]), | |
| 972 fixed = TRUE) == TRUE) { | |
| 973 outputmatrix <- as.character(Firstsubbackfreq[, m]) | |
| 974 outputmatrix <- matrix(outputmatrix, nrow = 1) | |
| 975 #with that accession number, find a match in the subbackfreq file and save it here | |
| 976 FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix) | |
| 977 } | |
| 978 } | |
| 979 } | |
| 980 FTLFinalMatrix<-FTLFinalMatrix[!duplicated(FTLFinalMatrix),] | |
| 981 | |
| 982 columnalheader<-c(rep(NA,36)) | |
| 983 ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1) | |
| 984 | |
| 985 for (k in 1:length(ITDmotifsFINAL)) { | |
| 986 AN<-00000 | |
| 987 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is | |
| 988 #destroyed immediately after use | |
| 989 for (m in 1:ncol(Thirdsubbackfreq)) { | |
| 990 AN <- as.character(Thirdsubbackfreq[1, m]) | |
| 991 if (grepl(pattern = AN, | |
| 992 x = names(ITDmotifsFINAL[k]), | |
| 993 fixed = TRUE) == TRUE) { | |
| 994 outputmatrix <- as.character(Thirdsubbackfreq[, m]) | |
| 995 outputmatrix <- matrix(outputmatrix, nrow = 1) | |
| 996 #with that accession number, find a match in the subbackfreq file and save it here | |
| 997 ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix) | |
| 998 } | |
| 999 } | |
| 1000 } | |
| 1001 ITDFinalMatrix<-ITDFinalMatrix[!duplicated(ITDFinalMatrix),] | |
| 1002 | |
| 1003 columnalheader<-c(rep(NA,36)) | |
| 1004 D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1) | |
| 1005 | |
| 1006 for (k in 1:length(D835YmotifsFINAL)) { | |
| 1007 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is | |
| 1008 #destroyed immediately after use | |
| 1009 for (m in 1:ncol(Secondsubbackfreq)) { | |
| 1010 AN <- as.character(Secondsubbackfreq[1, m]) | |
| 1011 if (grepl(pattern = AN, | |
| 1012 x = names(D835YmotifsFINAL[k]), | |
| 1013 fixed = TRUE) == TRUE) { | |
| 1014 outputmatrix <- as.character(Secondsubbackfreq[, m]) | |
| 1015 outputmatrix <- matrix(outputmatrix, nrow = 1) | |
| 1016 #with that accession number, find a match in the subbackfreq file and save it here | |
| 1017 D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix) | |
| 1018 } | |
| 1019 } | |
| 1020 } | |
| 1021 D835YFinalMatrix<-D835YFinalMatrix[!duplicated(D835YFinalMatrix),] | |
| 1022 | |
| 1023 FTLoutputmatrix<-matrix(data=c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)),ncol = 2) | |
| 1024 | |
| 1025 #another fucking for loop | |
| 1026 FLTreference<-FTLoutputmatrix[,2] | |
| 1027 | |
| 1028 FirstLine<-colnames(FirstSubstrateSet) | |
| 1029 FirstLine<-FirstLine[1:23] | |
| 1030 for (q in 1:nrow(FTLoutputmatrix)) { | |
| 1031 thismotif<-unlist(strsplit(FTLoutputmatrix[q,1],"")) | |
| 1032 thisoutput<-c("","",FTLoutputmatrix[q,2],thismotif,"","","","","") | |
| 1033 FirstLine<-rbind(FirstLine,thisoutput) | |
| 1034 } | |
| 1035 | |
| 1036 | |
| 1037 | |
| 1038 write.table(x=FirstLine, | |
| 1039 file=First_unshared_motifs_table, | |
| 1040 quote=FALSE, sep=",", | |
| 1041 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 1042 | |
| 1043 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1])) | |
| 1044 columnalheader<-matrix(columnalheader,nrow = 1) | |
| 1045 | |
| 1046 # columnalheader<-rbind(columnalheader,FTLFinalMatrix) | |
| 1047 | |
| 1048 write.table(x=columnalheader, | |
| 1049 file=First_unshared_subbackfreq, | |
| 1050 quote=FALSE, sep=",", | |
| 1051 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 1052 | |
| 1053 write.table(x=FTLFinalMatrix[2:nrow(FTLFinalMatrix),], | |
| 1054 file=First_unshared_subbackfreq, | |
| 1055 quote=FALSE, sep=",", | |
| 1056 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 1057 | |
| 1058 ############################################################################################################ | |
| 1059 | |
| 1060 D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2) | |
| 1061 | |
| 1062 FLTreference<-D835Youtputmatrix[,2] | |
| 1063 | |
| 1064 FirstLine<-colnames(FirstSubstrateSet) | |
| 1065 FirstLine<-FirstLine[1:23] | |
| 1066 for (q in 1:nrow(D835Youtputmatrix)) { | |
| 1067 thismotif<-unlist(strsplit(D835Youtputmatrix[q,1],"")) | |
| 1068 thisoutput<-c("","",D835Youtputmatrix[q,2],thismotif,"","","","","") | |
| 1069 FirstLine<-rbind(FirstLine,thisoutput) | |
| 1070 } | |
| 1071 | |
| 1072 | |
| 1073 | |
| 1074 write.table(x=FirstLine, | |
| 1075 file=Second_unshared_motifs_table, | |
| 1076 quote=FALSE, sep=",", | |
| 1077 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 1078 | |
| 1079 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1])) | |
| 1080 columnalheader<-matrix(columnalheader,nrow = 1) | |
| 1081 | |
| 1082 # columnalheader<-rbind(columnalheader,D835YFinalMatrix) | |
| 1083 | |
| 1084 write.table(x=columnalheader, | |
| 1085 file=Second_unshared_subbackfreq, | |
| 1086 quote=FALSE, sep=",", | |
| 1087 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 1088 | |
| 1089 write.table(x=D835YFinalMatrix[2:nrow(D835YFinalMatrix),], | |
| 1090 file=Second_unshared_subbackfreq, | |
| 1091 quote=FALSE, sep=",", | |
| 1092 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 1093 | |
| 1094 ############################################################################################################ | |
| 1095 | |
| 1096 ITDoutputmatrix<-matrix(data = c(ITDmotifsFINAL,names(ITDmotifsFINAL)),ncol = 2) | |
| 1097 | |
| 1098 FLTreference<-ITDoutputmatrix[,2] | |
| 1099 | |
| 1100 FirstLine<-colnames(FirstSubstrateSet) | |
| 1101 FirstLine<-FirstLine[1:23] | |
| 1102 for (q in 1:nrow(ITDoutputmatrix)) { | |
| 1103 thismotif<-unlist(strsplit(ITDoutputmatrix[q,1],"")) | |
| 1104 thisoutput<-c("","",ITDoutputmatrix[q,2],thismotif,"","","","","") | |
| 1105 FirstLine<-rbind(FirstLine,thisoutput) | |
| 1106 } | |
| 1107 | |
| 1108 | |
| 1109 write.table(x=FirstLine, | |
| 1110 file=Third_unshared_motifs_table, | |
| 1111 quote=FALSE, sep=",", | |
| 1112 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 1113 | |
| 1114 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1])) | |
| 1115 columnalheader<-matrix(columnalheader,nrow = 1) | |
| 1116 | |
| 1117 # columnalheader<-rbind(columnalheader,ITDFinalMatrix) | |
| 1118 | |
| 1119 write.table(x=columnalheader, | |
| 1120 file=Third_unshared_subbackfreq, | |
| 1121 quote=FALSE, sep=",", | |
| 1122 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 1123 | |
| 1124 write.table(x=ITDFinalMatrix[2:nrow(ITDFinalMatrix),], | |
| 1125 file=Third_unshared_subbackfreq, | |
| 1126 quote=FALSE, sep=",", | |
| 1127 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 1128 | |
| 1129 } |
