comparison ST CandD/test-data/CandD.R @ 0:1875d4b62014 draft default tip

Uploaded
author jfb
date Wed, 27 Jun 2018 11:36:32 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1875d4b62014
1 FirstSubstrateSet<- read.csv("input1.csv", stringsAsFactors=FALSE)
2 Firstsubbackfreq<- read.csv("input2.csv", header=FALSE, stringsAsFactors=FALSE)
3
4 SecondSubstrateSet<- read.csv("input3.csv", stringsAsFactors=FALSE)
5 Secondsubbackfreq<- read.csv("input4.csv", header=FALSE, stringsAsFactors=FALSE)
6
7 ThirdSubstrateSet<- read.csv("input5.csv", stringsAsFactors=FALSE)
8 Thirdsubbackfreq<- read.csv("input6.csv", header=FALSE, stringsAsFactors=FALSE)
9
10
11 args = commandArgs(trailingOnly=TRUE)
12
13 print(args[1])
14 print(args[2])
15 print(args[3])
16
17
18 #ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps
19 FullMotifsOnly_questionmark<-args[1]
20 #If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps
21 TruncatedMotifsOnly_questionmark<-args[2]
22 #if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps)
23 Are_You_Looking_For_Commonality<-args[3]
24
25
26 #then put the names of your output files here
27 Shared_motifs_table<-"sharedmotifs.csv"
28 Shared_subbackfreq_table<-"sharedSBF.csv"
29
30 # Shared_motifs_table<-"Shared motifs 7-27-17.csv"
31 # Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv"
32
33 First_unshared_motifs_table<-"R1 substrates.csv"
34 First_unshared_subbackfreq<-"R1 SBF.csv"
35
36 Second_unshared_motifs_table<-"R2 subs.csv"
37 Second_unshared_subbackfreq<-"R2 SBf.csv"
38
39 Third_unshared_motifs_table<-"R3 subs.csv"
40 Third_unshared_subbackfreq<-"R3 SBF.csv"
41
42 #final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles. I think I'll poke around
43 #other languages to see if any of them can do it.
44 ####################################################################################################################################
45
46
47
48
49
50 Firstx<-rep("x",times=nrow(FirstSubstrateSet))
51 FirstST<-FirstSubstrateSet[,11]
52 FirstxST<-cbind(Firstx,FirstST)
53 FirstxST<-as.data.frame(FirstxST)
54 FirstxST<-within(FirstxST, x <- paste(Firstx,FirstST,sep=""))
55 FirstSubstrateSet[,11]<-FirstxST[,3]
56
57 Secondx<-rep("x",times=nrow(SecondSubstrateSet))
58 SecondST<-SecondSubstrateSet[,11]
59 SecondxST<-cbind(Secondx,SecondST)
60 SecondxST<-as.data.frame(SecondxST)
61 SecondxST<-within(SecondxST, x <- paste(Secondx,SecondST,sep=""))
62 SecondSubstrateSet[,11]<-SecondxST[,3]
63
64 Thirdx<-rep("x",times=nrow(ThirdSubstrateSet))
65 ThirdST<-ThirdSubstrateSet[,11]
66 ThirdxST<-cbind(Thirdx,ThirdST)
67 ThirdxST<-as.data.frame(ThirdxST)
68 ThirdxST<-within(ThirdxST, x <- paste(Thirdx,ThirdST,sep=""))
69 ThirdSubstrateSet[,11]<-ThirdxST[,3]
70
71
72
73
74
75
76
77
78
79
80 ####################################################################################################################################
81 ####################################################################################################################################
82 # better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two
83 # separate proteins thus two separate accession numbers?
84 # It should actually output the shared motif and BOTH accession numbers. Right now it does not, it only maps out the second
85 # accession number. So that needs to be fixed BUT you need to keep the commonality between a motif and its accession number
86 ####################################################################################################################################
87 ####################################################################################################################################
88 ####################################################################################################################################
89 ####################################################################################################################################
90
91 #Create the motif sets, deciding wether or not you're looking for truncated or full here
92 #full only
93 if (Are_You_Looking_For_Commonality=="YES"){
94 if (FullMotifsOnly_questionmark=="YES"){
95 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
96 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
97 leftspaces<-c()
98 rightspaces<-c()
99 for (i in 1:nrow(FirstSubstrateSet)){
100 FTLwtletters<-FirstSubstrateSet[i,4:18]
101 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
102 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
103
104
105 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
106 YYYposition <- match(x = "x", table = YYYmotif)
107 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
108 #just 3 letters to the left of x
109
110 YYYLettersToTheLeft <- YYYposition - 1
111 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
112 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
113 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
114 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
115 #variable the user puts in is
116
117 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
118 motif<-YYYmotif
119 #save that motif, which is the Y and +/- 4 amino acids, including truncation
120 motif<-motif[!motif %in% "x"]
121 motif<-paste(motif, sep="", collapse="")
122 FTLwtletters<-motif
123 FTLwtmotifs[i,1]<-FTLwtletters
124 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
125
126 }
127
128 }
129 # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)]
130 # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1)
131 #
132
133 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
134 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
135
136 for (i in 1:nrow(SecondSubstrateSet)){
137 D835letters<-SecondSubstrateSet[i,4:18]
138 D835letters<-D835letters[D835letters !="XXXXX"]
139 D835letters<-paste(D835letters, sep="", collapse="")
140
141
142 YYYmotif <- unlist(strsplit(D835letters, split = ""))
143 YYYposition <- match(x = "x", table = YYYmotif)
144 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
145 #just 3 letters to the left of x
146
147 YYYLettersToTheLeft <- YYYposition - 1
148 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
149 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
150 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
151 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
152 #variable the user puts in is
153
154 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
155 motif<-YYYmotif
156 #add blank spaces if the motif has less than 4 letters to the left/right
157 motif<-c(leftspaces,YYYmotif,rightspaces)
158 #save that motif, which is the Y and +/- 4 amino acids, including truncation
159 motif<-motif[!motif %in% "x"]
160 motif<-paste(motif, sep="", collapse="")
161 D835letters<-motif
162 D835Ymotifs[i,1]<-D835letters
163 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
164
165 }
166 }
167
168 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
169 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
170
171 for (i in 1:nrow(ThirdSubstrateSet)){
172 ITDletters<-ThirdSubstrateSet[i,4:18]
173 ITDletters<-ITDletters[ITDletters !="XXXXX"]
174 ITDletters<-paste(ITDletters, sep="", collapse="")
175 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
176 YYYposition <- match(x = "x", table = YYYmotif)
177 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
178 #just 3 letters to the left of x
179
180 YYYLettersToTheLeft <- YYYposition - 1
181 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
182 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
183 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
184 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
185 #variable the user puts in is
186
187 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
188 motif<-YYYmotif
189 #add blank spaces if the motif has less than 4 letters to the left/right
190 motif<-c(leftspaces,YYYmotif,rightspaces)
191 #save that motif, which is the Y and +/- 4 amino acids, including truncation
192 motif<-motif[!motif %in% "x"]
193 motif<-paste(motif, sep="", collapse="")
194 ITDletters<-motif
195 ITDmotifs[i,1]<-ITDletters
196 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3]
197
198 }
199 }
200
201 }
202
203 ##############################################3
204 #Truncated only
205 if (TruncatedMotifsOnly_questionmark=="YES"){
206 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
207 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
208
209 for (i in 1:nrow(FirstSubstrateSet)){
210 FTLwtletters<-FirstSubstrateSet[i,4:18]
211 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
212 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
213
214
215 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
216 YYYposition <- match(x = "x", table = YYYmotif)
217 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
218 #just 3 letters to the left of x
219
220 YYYLettersToTheLeft <- YYYposition - 1
221 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
222 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
223 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
224 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
225 #variable the user puts in is
226
227 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
228 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
229 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
230 #add blank spaces if the motif has less than 4 letters to the left/right
231 motif<-c(leftspaces,YYYmotif,rightspaces)
232 #save that motif, which is the Y and +/- 4 amino acids, including truncation
233 motif<-motif[!motif %in% "x"]
234 motif<-paste(motif, sep="", collapse="")
235 FTLwtletters<-motif
236 FTLwtmotifs[i,1]<-FTLwtletters
237 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
238 }
239
240 }
241
242 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
243 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
244
245 for (i in 1:nrow(SecondSubstrateSet)){
246 D835letters<-SecondSubstrateSet[i,4:18]
247 D835letters<-D835letters[D835letters !="XXXXX"]
248 D835letters<-paste(D835letters, sep="", collapse="")
249
250
251 YYYmotif <- unlist(strsplit(D835letters, split = ""))
252 YYYposition <- match(x = "x", table = YYYmotif)
253 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
254 #just 3 letters to the left of x
255
256 YYYLettersToTheLeft <- YYYposition - 1
257 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
258 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
259 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
260 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
261 #variable the user puts in is
262
263 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
264 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
265 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
266 #add blank spaces if the motif has less than 4 letters to the left/right
267 motif<-c(leftspaces,YYYmotif,rightspaces)
268 #save that motif, which is the Y and +/- 4 amino acids, including truncation
269 motif<-motif[!motif %in% "x"]
270 motif<-paste(motif, sep="", collapse="")
271 D835letters<-motif
272 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
273 D835Ymotifs[i,1]<-D835letters
274 }
275 }
276
277 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
278 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
279
280 for (i in 1:nrow(ThirdSubstrateSet)){
281 ITDletters<-ThirdSubstrateSet[i,4:18]
282 ITDletters<-ITDletters[ITDletters !="XXXXX"]
283 ITDletters<-paste(ITDletters, sep="", collapse="")
284 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
285 YYYposition <- match(x = "x", table = YYYmotif)
286 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
287 #just 3 letters to the left of x
288
289 YYYLettersToTheLeft <- YYYposition - 1
290 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
291 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
292 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
293 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
294 #variable the user puts in is
295
296 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
297 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
298 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
299 #add blank spaces if the motif has less than 4 letters to the left/right
300 motif<-c(leftspaces,YYYmotif,rightspaces)
301 #save that motif, which is the Y and +/- 4 amino acids, including truncation
302 motif<-motif[!motif %in% "x"]
303 motif<-paste(motif, sep="", collapse="")
304 ITDletters<-motif
305 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3]
306 ITDmotifs[i,1]<-ITDletters
307 }
308 }
309
310 }
311
312 ###############################################
313 #ALL motifs, full and truncated
314
315 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){
316 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
317 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
318
319 for (i in 1:nrow(FirstSubstrateSet)){
320 FTLwtletters<-FirstSubstrateSet[i,4:18]
321 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
322 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
323 leftspaces<-c()
324 rightspaces<-c()
325
326 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
327 YYYposition <- match(x = "x", table = YYYmotif)
328 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
329 #just 3 letters to the left of x
330
331 YYYLettersToTheLeft <- YYYposition - 1
332 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
333 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
334 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
335 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
336 #variable the user puts in is
337
338
339 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
340 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
341 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
342 #add blank spaces if the motif has less than 4 letters to the left/right
343 motif<-c(leftspaces,YYYmotif,rightspaces)
344 #save that motif, which is the Y and +/- 4 amino acids, including truncation
345 motif<-motif[!motif %in% "x"]
346 motif<-paste(motif, sep="", collapse="")
347 FTLwtletters<-motif
348 FTLwtmotifs[i,1]<-FTLwtletters
349 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
350 }
351
352 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
353 motif<-YYYmotif
354 #add blank spaces if the motif has less than 4 letters to the left/right
355 motif<-c(leftspaces,YYYmotif,rightspaces)
356 #save that motif, which is the Y and +/- 4 amino acids, including truncation
357 motif<-motif[!motif %in% "x"]
358 motif<-paste(motif, sep="", collapse="")
359 FTLwtletters<-motif
360 FTLwtmotifs[i,1]<-FTLwtletters
361 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
362
363
364 }
365
366 }
367
368 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
369 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
370
371 for (i in 1:nrow(SecondSubstrateSet)){
372 D835letters<-SecondSubstrateSet[i,4:18]
373 D835letters<-D835letters[D835letters !="XXXXX"]
374 D835letters<-paste(D835letters, sep="", collapse="")
375 leftspaces<-c()
376 rightspaces<-c()
377
378 YYYmotif <- unlist(strsplit(D835letters, split = ""))
379 YYYposition <- match(x = "x", table = YYYmotif)
380 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
381 #just 3 letters to the left of x
382
383 YYYLettersToTheLeft <- YYYposition - 1
384 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
385 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
386 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
387 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
388 #variable the user puts in is
389 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
390 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
391 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
392 #add blank spaces if the motif has less than 4 letters to the left/right
393 motif<-c(leftspaces,YYYmotif,rightspaces)
394 #save that motif, which is the Y and +/- 4 amino acids, including truncation
395 motif<-motif[!motif %in% "x"]
396 motif<-paste(motif, sep="", collapse="")
397 D835letters<-motif
398 D835Ymotifs[i,1]<-D835letters
399 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
400 }
401
402 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
403 motif<-YYYmotif
404 #add blank spaces if the motif has less than 4 letters to the left/right
405 motif<-c(leftspaces,YYYmotif,rightspaces)
406 #save that motif, which is the Y and +/- 4 amino acids, including truncation
407 motif<-motif[!motif %in% "x"]
408 motif<-paste(motif, sep="", collapse="")
409 D835letters<-motif
410 D835Ymotifs[i,1]<-D835letters
411 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
412 }
413 }
414
415
416 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
417 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
418
419 for (i in 1:nrow(ThirdSubstrateSet)){
420 ITDletters<-ThirdSubstrateSet[i,4:18]
421 ITDletters<-ITDletters[ITDletters !="XXXXX"]
422 ITDletters<-paste(ITDletters, sep="", collapse="")
423 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
424 leftspaces<-c()
425 rightspaces<-c()
426 YYYposition <- match(x = "x", table = YYYmotif)
427 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
428 #just 3 letters to the left of x
429
430 YYYLettersToTheLeft <- YYYposition - 1
431 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
432 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
433 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
434 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
435 #variable the user puts in is
436 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
437 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
438 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
439 #add blank spaces if the motif has less than 4 letters to the left/right
440 motif<-c(leftspaces,YYYmotif,rightspaces)
441 #save that motif, which is the Y and +/- 4 amino acids, including truncation
442 motif<-motif[!motif %in% "x"]
443 motif<-paste(motif, sep="", collapse="")
444 ITDletters<-motif
445 ITDmotifs[i,1]<-ITDletters
446 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
447 }
448
449 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
450 motif<-YYYmotif
451 #add blank spaces if the motif has less than 4 letters to the left/right
452 motif<-c(leftspaces,YYYmotif,rightspaces)
453 #save that motif, which is the Y and +/- 4 amino acids, including truncation
454 motif<-motif[!motif %in% "x"]
455 motif<-paste(motif, sep="", collapse="")
456 ITDletters<-motif
457 ITDmotifs[i,1]<-ITDletters
458 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
459 }
460 }
461
462 }
463 #############################################################################################################################
464 #############################################################################################################################
465 #############################################################################################################################
466 #############################################################################################################################
467 #############################################################################################################################
468
469 #now look for either commonality or difference. Actually could you look for both...
470
471 if (Are_You_Looking_For_Commonality=="YES"){
472
473 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))
474 columnalheader<-matrix(columnalheader,nrow = 1)
475 # write.table(x=columnalheader,
476 # file=Shared_subbackfreq_table,
477 # quote=FALSE, sep=",",
478 # row.names=FALSE,col.names = FALSE, na="", append=TRUE)
479
480 FirstOverlapmotifs<-c()
481 for (i in 1:nrow(ITDmotifs)){
482 for (j in 1:nrow(D835Ymotifs)){
483 if (is.na(ITDmotifs[i,1])!=TRUE&&is.na(D835Ymotifs[j,1])!=TRUE){
484 if (ITDmotifs[i,1]==D835Ymotifs[j,1]){
485 FirstOverlapmotifs<-c(FirstOverlapmotifs,D835Ymotifs[j,1])
486 }
487 }
488 }
489 }
490
491 AllAccessionNumbers<-c()
492 columnalheader<-c(rep(NA,36))
493 FinalMatrix<-matrix(data =columnalheader,nrow = 1)
494
495 FinalMotifs<-c(rep(NA,20))
496 FinalMotifsMatrix<-matrix(data = FinalMotifs,nrow = 1)
497
498
499 for (l in 1:length(FirstOverlapmotifs)) {
500 AccessionNumber<-00000000000
501 for (k in 1:nrow(FTLwtmotifs)) {
502 AccessionNumber<-0000000000000
503 if(is.na(FTLwtmotifs[k])!=TRUE){
504 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
505 #destroyed immediately after use
506 if (FirstOverlapmotifs[l] == FTLwtmotifs[k]) {
507 substratematrix<-FirstSubstrateSet[k,1:20]
508 substratematrix<-as.matrix(substratematrix,nrow=1)
509 FinalMotifsMatrix<-rbind(FinalMotifsMatrix,substratematrix)
510 #when you find a match between the venn diagrams, save the substrate info you get into a matrix
511
512 AccessionNumber <- as.character(FirstSubstrateSet[k, 3])
513 #then take the accession number
514
515 for (m in 1:ncol(Firstsubbackfreq)) {
516 AN <- as.character(Firstsubbackfreq[1, m])
517 if (grepl(pattern = AN,
518 x = AccessionNumber,
519 fixed = TRUE) == TRUE) {
520 outputmatrix <- as.character(Firstsubbackfreq[, m])
521 outputmatrix <- matrix(outputmatrix, nrow = 1)
522 #with that accession number, find a match in the subbackfreq file and save it here
523 FinalMatrix<-rbind(FinalMatrix,outputmatrix)
524 }
525 }
526 }
527 }
528 }
529 }
530
531
532 TrueMatrix<-FinalMatrix[!duplicated(FinalMatrix),]
533 TrueFinalMotifsMatrix<-FinalMotifsMatrix[!duplicated(FinalMotifsMatrix),]
534
535 TrueFinalMotifsMatrix<-TrueFinalMotifsMatrix[2:nrow(TrueFinalMotifsMatrix),]
536 TrueMatrix<-TrueMatrix[2:nrow(TrueMatrix),]
537
538 write.table(
539 x = TrueFinalMotifsMatrix,
540 file = Shared_motifs_table,
541 quote = FALSE,
542 sep = ",",
543 row.names = FALSE,
544 col.names = TRUE,
545 na = "",
546 append = FALSE
547 )
548
549 #TrueMatrix<-t(TrueMatrix)
550 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))
551 columnalheader<-matrix(columnalheader,nrow = 1)
552
553 TrueMatrix<-rbind(columnalheader,TrueMatrix)
554 TrueMatrix<-t(TrueMatrix)
555
556 write.table(
557 x = TrueMatrix,
558 file = Shared_subbackfreq_table,
559 quote = FALSE,
560 sep = ",",
561 row.names = FALSE,
562 col.names = FALSE,
563 na = "",
564 append = TRUE
565 )
566 }
567 }
568
569 if (Are_You_Looking_For_Commonality=="NO"){
570 if (FullMotifsOnly_questionmark=="YES"){
571 FTLwtmotifs=rep(NA,times=nrow(FirstSubstrateSet))
572 FTLwtAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
573 leftspaces<-c()
574 rightspaces<-c()
575 for (i in 1:nrow(FirstSubstrateSet)){
576 FTLwtletters<-FirstSubstrateSet[i,4:18]
577 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
578 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
579
580
581 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
582 YYYposition <- match(x = "x", table = YYYmotif)
583 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
584 #just 3 letters to the left of x
585
586 YYYLettersToTheLeft <- YYYposition - 1
587 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
588 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
589 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
590 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
591 #variable the user puts in is
592
593 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
594 motif<-YYYmotif
595 #save that motif, which is the Y and +/- 4 amino acids, including truncation
596 motif<-motif[!motif %in% "x"]
597 motif<-paste(motif, sep="", collapse="")
598 FTLwtletters<-motif
599 FTLwtmotifs[i]<-FTLwtletters
600 FTLwtAccessionNumbers[i]<-FirstSubstrateSet[i,3]
601 }
602
603 }
604 # FTLwtmotifs <- FTLwtmotifs[!is.na(FTLwtmotifs)]
605 # FTLwtmotifs<-matrix(FTLwtmotifs,ncol = 1)
606 #
607
608 D835Ymotifs=rep(NA,times=nrow(FirstSubstrateSet))
609 D835YAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
610
611 for (i in 1:nrow(SecondSubstrateSet)){
612 D835letters<-SecondSubstrateSet[i,4:18]
613 D835letters<-D835letters[D835letters !="XXXXX"]
614 D835letters<-paste(D835letters, sep="", collapse="")
615
616
617 YYYmotif <- unlist(strsplit(D835letters, split = ""))
618 YYYposition <- match(x = "x", table = YYYmotif)
619 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
620 #just 3 letters to the left of x
621
622 YYYLettersToTheLeft <- YYYposition - 1
623 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
624 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
625 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
626 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
627 #variable the user puts in is
628
629 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
630 motif<-YYYmotif
631 #add blank spaces if the motif has less than 4 letters to the left/right
632 motif<-c(leftspaces,YYYmotif,rightspaces)
633 #save that motif, which is the Y and +/- 4 amino acids, including truncation
634 motif<-motif[!motif %in% "x"]
635 motif<-paste(motif, sep="", collapse="")
636 D835letters<-motif
637 D835Ymotifs[i]<-D835letters
638 D835YAccessionNumbers[i]<-SecondSubstrateSet[i,3]
639 }
640 }
641
642 ITDmotifs=rep(NA,times=nrow(FirstSubstrateSet))
643 ITDAccessionNumbers=rep(NA,times=nrow(FirstSubstrateSet))
644
645 for (i in 1:nrow(ThirdSubstrateSet)){
646 ITDletters<-ThirdSubstrateSet[i,4:18]
647 ITDletters<-ITDletters[ITDletters !="XXXXX"]
648 ITDletters<-paste(ITDletters, sep="", collapse="")
649 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
650 YYYposition <- match(x = "x", table = YYYmotif)
651 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
652 #just 3 letters to the left of x
653
654 YYYLettersToTheLeft <- YYYposition - 1
655 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
656 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
657 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
658 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
659 #variable the user puts in is
660
661 if (YYYLettersToTheLeft > 6 && YYYLettersToTheRight > 6) {
662 motif<-YYYmotif
663 #add blank spaces if the motif has less than 4 letters to the left/right
664 motif<-c(leftspaces,YYYmotif,rightspaces)
665 #save that motif, which is the Y and +/- 4 amino acids, including truncation
666 motif<-motif[!motif %in% "x"]
667 motif<-paste(motif, sep="", collapse="")
668 ITDletters<-motif
669 ITDmotifs[i]<-ITDletters
670 ITDAccessionNumbers[i]<-ThirdSubstrateSet[i,3]
671
672 }
673 }
674 names(ITDmotifs)<-ITDAccessionNumbers
675 names(D835Ymotifs)<-D835YAccessionNumbers
676 names(FTLwtmotifs)<-FTLwtAccessionNumbers
677 }
678
679
680 ##############################################3
681 #Truncated only
682 if (TruncatedMotifsOnly_questionmark=="YES"){
683 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
684 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
685
686 for (i in 1:nrow(FirstSubstrateSet)){
687 FTLwtletters<-FirstSubstrateSet[i,4:18]
688 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
689 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
690
691
692 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
693 YYYposition <- match(x = "x", table = YYYmotif)
694 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
695 #just 3 letters to the left of x
696
697 YYYLettersToTheLeft <- YYYposition - 1
698 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
699 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
700 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
701 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
702 #variable the user puts in is
703
704 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
705 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
706 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
707 #add blank spaces if the motif has less than 4 letters to the left/right
708 motif<-c(leftspaces,YYYmotif,rightspaces)
709 #save that motif, which is the Y and +/- 4 amino acids, including truncation
710 motif<-motif[!motif %in% "x"]
711 motif<-paste(motif, sep="", collapse="")
712 FTLwtletters<-motif
713 FTLwtmotifs[i,1]<-FTLwtletters
714 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
715 }
716
717 }
718
719 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
720 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
721 i=2
722 for (i in 1:nrow(SecondSubstrateSet)){
723 D835letters<-SecondSubstrateSet[i,4:18]
724 D835letters<-D835letters[D835letters !="XXXXX"]
725 D835letters<-paste(D835letters, sep="", collapse="")
726
727
728 YYYmotif <- unlist(strsplit(D835letters, split = ""))
729 YYYposition <- match(x = "x", table = YYYmotif)
730 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
731 #just 3 letters to the left of x
732
733 YYYLettersToTheLeft <- YYYposition - 1
734 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
735 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
736 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
737 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
738 #variable the user puts in is
739
740 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
741 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
742 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
743 #add blank spaces if the motif has less than 4 letters to the left/right
744 motif<-c(leftspaces,YYYmotif,rightspaces)
745 #save that motif, which is the Y and +/- 4 amino acids, including truncation
746 motif<-motif[!motif %in% "x"]
747 motif<-paste(motif, sep="", collapse="")
748 D835letters<-motif
749 D835YAccessionNumbers[i,1]<-SecondSubstrateSet[i,3]
750 D835Ymotifs[i,1]<-D835letters
751 }
752 }
753
754 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
755 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
756
757 for (i in 1:nrow(ThirdSubstrateSet)){
758 ITDletters<-ThirdSubstrateSet[i,4:18]
759 ITDletters<-ITDletters[ITDletters !="XXXXX"]
760 ITDletters<-paste(ITDletters, sep="", collapse="")
761 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
762 YYYposition <- match(x = "x", table = YYYmotif)
763 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
764 #just 3 letters to the left of x
765
766 YYYLettersToTheLeft <- YYYposition - 1
767 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
768 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
769 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
770 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
771 #variable the user puts in is
772
773 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
774 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
775 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
776 #add blank spaces if the motif has less than 4 letters to the left/right
777 motif<-c(leftspaces,YYYmotif,rightspaces)
778 #save that motif, which is the Y and +/- 4 amino acids, including truncation
779 motif<-motif[!motif %in% "x"]
780 motif<-paste(motif, sep="", collapse="")
781 ITDletters<-motif
782 ITDAccessionNumbers[i,1]<-ThirdSubstrateSet[i,3]
783 ITDmotifs[i,1]<-ITDletters
784 }
785 }
786 names(FTLwtmotifs)<-FTLwtAccessionNumbers
787 names(D835Ymotifs)<-D835YAccessionNumbers
788 names(ITDmotifs)<-ITDAccessionNumbers
789 }
790
791 ###############################################
792 #ALL motifs, full and truncated
793
794 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){
795 FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
796 FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1)
797
798 for (i in 1:nrow(FirstSubstrateSet)){
799 FTLwtletters<-FirstSubstrateSet[i,4:18]
800 FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"]
801 FTLwtletters<-paste(FTLwtletters, sep="", collapse="")
802 leftspaces<-c()
803 rightspaces<-c()
804
805 YYYmotif <- unlist(strsplit(FTLwtletters, split = ""))
806 YYYposition <- match(x = "x", table = YYYmotif)
807 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
808 #just 3 letters to the left of x
809
810 YYYLettersToTheLeft <- YYYposition - 1
811 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
812 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
813 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
814 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
815 #variable the user puts in is
816
817
818 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
819 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
820 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
821 #add blank spaces if the motif has less than 4 letters to the left/right
822 motif<-c(leftspaces,YYYmotif,rightspaces)
823 #save that motif, which is the Y and +/- 4 amino acids, including truncation
824 motif<-motif[!motif %in% "x"]
825 motif<-paste(motif, sep="", collapse="")
826 FTLwtletters<-motif
827 FTLwtmotifs[i,1]<-FTLwtletters
828 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
829 }
830
831 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
832 motif<-YYYmotif
833 #add blank spaces if the motif has less than 4 letters to the left/right
834 motif<-c(leftspaces,YYYmotif,rightspaces)
835 #save that motif, which is the Y and +/- 4 amino acids, including truncation
836 motif<-motif[!motif %in% "x"]
837 motif<-paste(motif, sep="", collapse="")
838 FTLwtletters<-motif
839 FTLwtmotifs[i,1]<-FTLwtletters
840 FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
841
842
843 }
844
845 }
846
847 D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1)
848 D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1)
849
850 for (i in 1:nrow(SecondSubstrateSet)){
851 D835letters<-SecondSubstrateSet[i,4:18]
852 D835letters<-D835letters[D835letters !="XXXXX"]
853 D835letters<-paste(D835letters, sep="", collapse="")
854 leftspaces<-c()
855 rightspaces<-c()
856
857 YYYmotif <- unlist(strsplit(D835letters, split = ""))
858 YYYposition <- match(x = "x", table = YYYmotif)
859 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
860 #just 3 letters to the left of x
861
862 YYYLettersToTheLeft <- YYYposition - 1
863 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
864 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
865 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
866 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
867 #variable the user puts in is
868 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
869 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
870 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
871 #add blank spaces if the motif has less than 4 letters to the left/right
872 motif<-c(leftspaces,YYYmotif,rightspaces)
873 #save that motif, which is the Y and +/- 4 amino acids, including truncation
874 motif<-motif[!motif %in% "x"]
875 motif<-paste(motif, sep="", collapse="")
876 D835letters<-motif
877 D835Ymotifs[i,1]<-D835letters
878 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
879 }
880
881 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
882 motif<-YYYmotif
883 #add blank spaces if the motif has less than 4 letters to the left/right
884 motif<-c(leftspaces,YYYmotif,rightspaces)
885 #save that motif, which is the Y and +/- 4 amino acids, including truncation
886 motif<-motif[!motif %in% "x"]
887 motif<-paste(motif, sep="", collapse="")
888 D835letters<-motif
889 D835Ymotifs[i,1]<-D835letters
890 D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
891 }
892 }
893
894
895 ITDmotifs=matrix(,nrow = nrow(ThirdSubstrateSet),ncol=1)
896 ITDAccessionNumbers<-matrix(,nrow = nrow(ThirdSubstrateSet))
897
898 for (i in 1:nrow(ThirdSubstrateSet)){
899 ITDletters<-ThirdSubstrateSet[i,4:18]
900 ITDletters<-ITDletters[ITDletters !="XXXXX"]
901 ITDletters<-paste(ITDletters, sep="", collapse="")
902 YYYmotif <- unlist(strsplit(ITDletters, split = ""))
903 leftspaces<-c()
904 rightspaces<-c()
905 YYYposition <- match(x = "x", table = YYYmotif)
906 #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are
907 #just 3 letters to the left of x
908
909 YYYLettersToTheLeft <- YYYposition - 1
910 #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is
911 #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1
912 YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1
913 #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the
914 #variable the user puts in is
915 if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) {
916 leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft))
917 rightspaces<-rep(" ",times=7-(YYYLettersToTheRight))
918 #add blank spaces if the motif has less than 4 letters to the left/right
919 motif<-c(leftspaces,YYYmotif,rightspaces)
920 #save that motif, which is the Y and +/- 4 amino acids, including truncation
921 motif<-motif[!motif %in% "x"]
922 motif<-paste(motif, sep="", collapse="")
923 ITDletters<-motif
924 ITDmotifs[i,1]<-ITDletters
925 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
926 }
927
928 if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){
929 motif<-YYYmotif
930 #add blank spaces if the motif has less than 4 letters to the left/right
931 motif<-c(leftspaces,YYYmotif,rightspaces)
932 #save that motif, which is the Y and +/- 4 amino acids, including truncation
933 motif<-motif[!motif %in% "x"]
934 motif<-paste(motif, sep="", collapse="")
935 ITDletters<-motif
936 ITDmotifs[i,1]<-ITDletters
937 ITDAccessionNumbers[i,1]<-FirstSubstrateSet[i,3]
938 }
939 }
940 names(FTLwtmotifs)<-FTLwtAccessionNumbers
941 names(D835Ymotifs)<-D835YAccessionNumbers
942 names(ITDmotifs)<-ITDAccessionNumbers
943 }
944
945
946 FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs]
947 FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!FTLwtmotifsFINAL %in% ITDmotifs]
948 FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)]
949
950
951 ITDmotifsFINAL<-ITDmotifs[!ITDmotifs %in% D835Ymotifs]
952 ITDmotifsFINAL<-ITDmotifsFINAL[!ITDmotifsFINAL %in% FTLwtmotifs]
953 ITDmotifsFINAL<-ITDmotifsFINAL[!duplicated(ITDmotifsFINAL)]
954
955
956 D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs]
957 D835YmotifsFINAL<-D835YmotifsFINAL[!D835YmotifsFINAL %in% ITDmotifs]
958 D835YmotifsFINAL<-D835YmotifsFINAL[!duplicated(D835YmotifsFINAL)]
959
960
961 columnalheader<-c(rep(NA,36))
962 FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1)
963
964 for (k in 1:length(FTLwtmotifsFINAL)) {
965 AN<-00000
966 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
967 #destroyed immediately after use
968 for (m in 1:ncol(Firstsubbackfreq)) {
969 AN <- as.character(Firstsubbackfreq[1, m])
970 if (grepl(pattern = AN,
971 x = names(FTLwtmotifsFINAL[k]),
972 fixed = TRUE) == TRUE) {
973 outputmatrix <- as.character(Firstsubbackfreq[, m])
974 outputmatrix <- matrix(outputmatrix, nrow = 1)
975 #with that accession number, find a match in the subbackfreq file and save it here
976 FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix)
977 }
978 }
979 }
980 FTLFinalMatrix<-FTLFinalMatrix[!duplicated(FTLFinalMatrix),]
981
982 columnalheader<-c(rep(NA,36))
983 ITDFinalMatrix<-matrix(data =columnalheader,nrow = 1)
984
985 for (k in 1:length(ITDmotifsFINAL)) {
986 AN<-00000
987 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
988 #destroyed immediately after use
989 for (m in 1:ncol(Thirdsubbackfreq)) {
990 AN <- as.character(Thirdsubbackfreq[1, m])
991 if (grepl(pattern = AN,
992 x = names(ITDmotifsFINAL[k]),
993 fixed = TRUE) == TRUE) {
994 outputmatrix <- as.character(Thirdsubbackfreq[, m])
995 outputmatrix <- matrix(outputmatrix, nrow = 1)
996 #with that accession number, find a match in the subbackfreq file and save it here
997 ITDFinalMatrix<-rbind(ITDFinalMatrix,outputmatrix)
998 }
999 }
1000 }
1001 ITDFinalMatrix<-ITDFinalMatrix[!duplicated(ITDFinalMatrix),]
1002
1003 columnalheader<-c(rep(NA,36))
1004 D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1)
1005
1006 for (k in 1:length(D835YmotifsFINAL)) {
1007 #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is
1008 #destroyed immediately after use
1009 for (m in 1:ncol(Secondsubbackfreq)) {
1010 AN <- as.character(Secondsubbackfreq[1, m])
1011 if (grepl(pattern = AN,
1012 x = names(D835YmotifsFINAL[k]),
1013 fixed = TRUE) == TRUE) {
1014 outputmatrix <- as.character(Secondsubbackfreq[, m])
1015 outputmatrix <- matrix(outputmatrix, nrow = 1)
1016 #with that accession number, find a match in the subbackfreq file and save it here
1017 D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix)
1018 }
1019 }
1020 }
1021 D835YFinalMatrix<-D835YFinalMatrix[!duplicated(D835YFinalMatrix),]
1022
1023 FTLoutputmatrix<-matrix(data=c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)),ncol = 2)
1024
1025 #another fucking for loop
1026 FLTreference<-FTLoutputmatrix[,2]
1027
1028 FirstLine<-colnames(FirstSubstrateSet)
1029 FirstLine<-FirstLine[1:23]
1030 for (q in 1:nrow(FTLoutputmatrix)) {
1031 thismotif<-unlist(strsplit(FTLoutputmatrix[q,1],""))
1032 thisoutput<-c("","",FTLoutputmatrix[q,2],thismotif,"","","","","")
1033 FirstLine<-rbind(FirstLine,thisoutput)
1034 }
1035
1036
1037
1038 write.table(x=FirstLine,
1039 file=First_unshared_motifs_table,
1040 quote=FALSE, sep=",",
1041 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1042
1043 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))
1044 columnalheader<-matrix(columnalheader,nrow = 1)
1045
1046 # columnalheader<-rbind(columnalheader,FTLFinalMatrix)
1047
1048 write.table(x=columnalheader,
1049 file=First_unshared_subbackfreq,
1050 quote=FALSE, sep=",",
1051 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1052
1053 write.table(x=FTLFinalMatrix[2:nrow(FTLFinalMatrix),],
1054 file=First_unshared_subbackfreq,
1055 quote=FALSE, sep=",",
1056 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1057
1058 ############################################################################################################
1059
1060 D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2)
1061
1062 FLTreference<-D835Youtputmatrix[,2]
1063
1064 FirstLine<-colnames(FirstSubstrateSet)
1065 FirstLine<-FirstLine[1:23]
1066 for (q in 1:nrow(D835Youtputmatrix)) {
1067 thismotif<-unlist(strsplit(D835Youtputmatrix[q,1],""))
1068 thisoutput<-c("","",D835Youtputmatrix[q,2],thismotif,"","","","","")
1069 FirstLine<-rbind(FirstLine,thisoutput)
1070 }
1071
1072
1073
1074 write.table(x=FirstLine,
1075 file=Second_unshared_motifs_table,
1076 quote=FALSE, sep=",",
1077 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1078
1079 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))
1080 columnalheader<-matrix(columnalheader,nrow = 1)
1081
1082 # columnalheader<-rbind(columnalheader,D835YFinalMatrix)
1083
1084 write.table(x=columnalheader,
1085 file=Second_unshared_subbackfreq,
1086 quote=FALSE, sep=",",
1087 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1088
1089 write.table(x=D835YFinalMatrix[2:nrow(D835YFinalMatrix),],
1090 file=Second_unshared_subbackfreq,
1091 quote=FALSE, sep=",",
1092 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1093
1094 ############################################################################################################
1095
1096 ITDoutputmatrix<-matrix(data = c(ITDmotifsFINAL,names(ITDmotifsFINAL)),ncol = 2)
1097
1098 FLTreference<-ITDoutputmatrix[,2]
1099
1100 FirstLine<-colnames(FirstSubstrateSet)
1101 FirstLine<-FirstLine[1:23]
1102 for (q in 1:nrow(ITDoutputmatrix)) {
1103 thismotif<-unlist(strsplit(ITDoutputmatrix[q,1],""))
1104 thisoutput<-c("","",ITDoutputmatrix[q,2],thismotif,"","","","","")
1105 FirstLine<-rbind(FirstLine,thisoutput)
1106 }
1107
1108
1109 write.table(x=FirstLine,
1110 file=Third_unshared_motifs_table,
1111 quote=FALSE, sep=",",
1112 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1113
1114 columnalheader<-c(as.character(Thirdsubbackfreq[1:36,1]))
1115 columnalheader<-matrix(columnalheader,nrow = 1)
1116
1117 # columnalheader<-rbind(columnalheader,ITDFinalMatrix)
1118
1119 write.table(x=columnalheader,
1120 file=Third_unshared_subbackfreq,
1121 quote=FALSE, sep=",",
1122 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1123
1124 write.table(x=ITDFinalMatrix[2:nrow(ITDFinalMatrix),],
1125 file=Third_unshared_subbackfreq,
1126 quote=FALSE, sep=",",
1127 row.names=FALSE,col.names = FALSE, na="", append=TRUE)
1128
1129 }