Mercurial > repos > jfb > difference_finder
comparison all stuff/Difference finderMADE 7 TO 7 fastversion.R @ 0:cd1ddf7db611 draft
Uploaded
| author | jfb |
|---|---|
| date | Tue, 22 Jan 2019 16:00:05 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:cd1ddf7db611 |
|---|---|
| 1 #I should make an SOP for this. Problems we encountered: no x in the xY motif, and the kilodemon | |
| 2 #the output files have both Y and xY, they shouldn't why is that happening? make it not happen | |
| 3 #make sure that accession numbers stay locked to each motif, somehow | |
| 4 #output should look just like the KALIP input | |
| 5 #S2 is negatives | |
| 6 #ff you want ONLY FULL MOTIFS, put "YES" here, please use all caps | |
| 7 FullMotifsOnly_questionmark<-"NO" | |
| 8 #If you want ONLY TRUNCATED MOTIFS, put "YES" here, please use all caps | |
| 9 TruncatedMotifsOnly_questionmark<-"NO" | |
| 10 #if you want to find the overlap, put a "YES" here (all caps), if you want to find the non-overlap, put "NO" (all caps) | |
| 11 Are_You_Looking_For_Commonality<-"NO" | |
| 12 | |
| 13 | |
| 14 #put the names of your input files here | |
| 15 FirstSubstrateSet<- read.csv("S1.csv", stringsAsFactors=FALSE) | |
| 16 Firstsubbackfreq<- read.csv("SBF1.csv", header=FALSE, stringsAsFactors=FALSE) | |
| 17 | |
| 18 SecondSubstrateSet<- read.csv("S2.csv", stringsAsFactors=FALSE) | |
| 19 Secondsubbackfreq<- read.csv("SBF2.csv", header=FALSE, stringsAsFactors=FALSE) | |
| 20 | |
| 21 # ThirdSubstrateSet<- read.csv("Galaxy69-BTK_PLUS_R3_Substrates.csv", stringsAsFactors=FALSE) | |
| 22 # Thirdsubbackfreq<- read.csv("Galaxy70-BTK_PLUS_R3_SubstrateBackgroundFrequency.csv", header=FALSE, stringsAsFactors=FALSE) | |
| 23 | |
| 24 #then put the names of your output files here | |
| 25 # Shared_motifs_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-substrates.csv" | |
| 26 # Shared_subbackfreq_table<-"180719_GALAXY-BTK-plus-rep-OVLP-7to7-SubBackFreq.csv" | |
| 27 | |
| 28 # Shared_motifs_table<-"Shared motifs 7-27-17.csv" | |
| 29 # Shared_subbackfreq_table<-"SubstrateBackgrounFrequency-for-shared-motifs 4 7-27-17.csv" | |
| 30 | |
| 31 First_unshared_motifs_table<-"1RS.csv" | |
| 32 First_unshared_subbackfreq<-"1RSBF.csv" | |
| 33 | |
| 34 Second_unshared_motifs_table<-"2RS.csv" | |
| 35 Second_unshared_subbackfreq<-"2RSBF.csv" | |
| 36 | |
| 37 # Third_unshared_motifs_table<-"R3 subs.csv" | |
| 38 # Third_unshared_subbackfreq<-"R3 SBF.csv" | |
| 39 | |
| 40 #final note, this code is going to be unworkable if you want to make a Venn diagram of more than 3 circles. I think I'll poke around | |
| 41 #other languages to see if any of them can do it. | |
| 42 #################################################################################################################################### | |
| 43 | |
| 44 | |
| 45 | |
| 46 | |
| 47 | |
| 48 # grepl(pattern = "S", x=asdf, ignore.case = TRUE) | |
| 49 | |
| 50 FirstCentralLetters<-FirstSubstrateSet[,11] | |
| 51 SecondCentralLetters<-SecondSubstrateSet[,11] | |
| 52 | |
| 53 FirstEsses<-sapply(FirstCentralLetters, grepl, pattern="S", ignore.case=TRUE) | |
| 54 FirstTees<-sapply(FirstCentralLetters, grepl, pattern="T", ignore.case=TRUE) | |
| 55 FirstWys<-sapply(FirstCentralLetters, grepl, pattern="Y", ignore.case=TRUE) | |
| 56 | |
| 57 SecondEsses<-sapply(SecondCentralLetters, grepl, pattern="S", ignore.case=TRUE) | |
| 58 SecondTees<-sapply(SecondCentralLetters, grepl, pattern="T", ignore.case=TRUE) | |
| 59 SecondWys<-sapply(SecondCentralLetters, grepl, pattern="Y", ignore.case=TRUE) | |
| 60 | |
| 61 FirstCentralLetters<-replace(FirstCentralLetters,FirstEsses,"xS") | |
| 62 FirstCentralLetters<-replace(FirstCentralLetters,FirstTees,"xT") | |
| 63 FirstCentralLetters<-replace(FirstCentralLetters,FirstWys,"xY") | |
| 64 | |
| 65 SecondCentralLetters<-replace(SecondCentralLetters,SecondEsses,"xS") | |
| 66 SecondCentralLetters<-replace(SecondCentralLetters,SecondTees,"xT") | |
| 67 SecondCentralLetters<-replace(SecondCentralLetters,SecondWys,"xY") | |
| 68 | |
| 69 FirstCentralLetters->FirstSubstrateSet[,11] | |
| 70 SecondCentralLetters->SecondSubstrateSet[,11] | |
| 71 | |
| 72 #################################################################################################################################### | |
| 73 #################################################################################################################################### | |
| 74 # better version of this code written in C: what happens when two kinases share a motif, but they found that motif in two | |
| 75 # separate proteins thus two separate accession numbers? | |
| 76 # It should actually output the shared motif and BOTH accession numbers. Right now it does not, it only maps out the second | |
| 77 # accession number. So that needs to be fixed BUT you need to keep the commonality between a motif and its accession number | |
| 78 #################################################################################################################################### | |
| 79 #################################################################################################################################### | |
| 80 #################################################################################################################################### | |
| 81 #################################################################################################################################### | |
| 82 | |
| 83 #Create the motif sets, deciding wether or not you're looking for truncated or full here | |
| 84 #full only | |
| 85 | |
| 86 | |
| 87 if (Are_You_Looking_For_Commonality=="NO"){ | |
| 88 | |
| 89 ############################################### | |
| 90 #ALL motifs, full and truncated | |
| 91 | |
| 92 if (FullMotifsOnly_questionmark!="YES"&&TruncatedMotifsOnly_questionmark!="YES"){ | |
| 93 # FTLwtmotifs=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
| 94 # FTLwtAccessionNumbers=matrix(,nrow = nrow(FirstSubstrateSet),ncol=1) | |
| 95 | |
| 96 #so what this does is | |
| 97 #find the central phospho position and add spaces to either side of it | |
| 98 #create a pasted together motif which will be used for difference finding | |
| 99 #keep motifs and accession numbers in the same index position of two vectors | |
| 100 | |
| 101 #so instead just take the FLTwt[i,4:18], and replace "" with "O" | |
| 102 #then perform paste function | |
| 103 #then FLTwt with "O" can be the motif that will get printed to the CSV (after re-removing the "O") and the pasted motif is in its own vector that follows the A #s | |
| 104 | |
| 105 FTLwtmotifs<-FirstSubstrateSet[,4:18] | |
| 106 # FTLwtmotifs<-sapply(FTLwtmotifs, gsub, pattern=" ", replacement="O",ignore.case=TRUE) | |
| 107 FTLwtmotifs<-replace(FTLwtmotifs, FTLwtmotifs=="", "O") | |
| 108 FTLwtmotifs<-apply(FTLwtmotifs, 1, paste, sep="", collapse="") | |
| 109 FTLwtAccessionNumbers<-FirstSubstrateSet[,3] | |
| 110 | |
| 111 # D835Ymotifs=matrix(,nrow = nrow(SecondSubstrateSet),ncol=1) | |
| 112 # D835YAccessionNumbers<-matrix(,nrow = nrow(SecondSubstrateSet),ncol = 1) | |
| 113 | |
| 114 D835Ymotifs<-SecondSubstrateSet[,4:18] | |
| 115 # D835Ymotifs<-sapply(D835Ymotifs, gsub, patter="", replacement="O",ignore.case=TRUE) | |
| 116 D835Ymotifs<-replace(D835Ymotifs, D835Ymotifs=="", "O") | |
| 117 D835Ymotifs<-replace(D835Ymotifs, D835Ymotifs=="", "O") | |
| 118 D835Ymotifs<-apply(D835Ymotifs, 1, paste, sep="", collapse="") | |
| 119 D835YAccessionNumbers<-SecondSubstrateSet[,3] | |
| 120 | |
| 121 names(FTLwtmotifs)<-FTLwtAccessionNumbers | |
| 122 names(D835Ymotifs)<-D835YAccessionNumbers | |
| 123 | |
| 124 } | |
| 125 | |
| 126 | |
| 127 FTLwtmotifsFINAL<-FTLwtmotifs[!FTLwtmotifs %in% D835Ymotifs] | |
| 128 FTLwtmotifsFINAL<-FTLwtmotifsFINAL[!duplicated(FTLwtmotifsFINAL)] | |
| 129 | |
| 130 D835YmotifsFINAL<-D835Ymotifs[!D835Ymotifs %in% FTLwtmotifs] | |
| 131 D835YmotifsFINAL<-D835YmotifsFINAL[!duplicated(D835YmotifsFINAL)] | |
| 132 | |
| 133 | |
| 134 | |
| 135 # columnalheader<-c(rep(NA,36)) | |
| 136 # FTLFinalMatrix<-matrix(data =columnalheader,nrow = 1) | |
| 137 # | |
| 138 #how to make this an apply | |
| 139 #actual first turn names(motifsFINAL) into a vector, then gsub to replace all "|" pipe characters with "*" star characters | |
| 140 #first take the accession numbers as Firstsubbackfreq[1,], then gsub to replace all "|" pipe characters with "*" star characters | |
| 141 #then use AccessionNumbers as a vector and use | |
| 142 #matches <- unique (grep(paste(toMatch,collapse="|"), names(motifsFINAL), value=TRUE)) | |
| 143 | |
| 144 FLTwtNames<-names(FTLwtmotifsFINAL) | |
| 145 FLTwtNames<-FLTwtNames[!duplicated(FLTwtNames)] | |
| 146 #the problem is the names includes a lot of things that are multi names separated by ; | |
| 147 #I need to do soem paper writing and poster writing | |
| 148 | |
| 149 # FLTwtNames<-gsub(pattern = '\\|', replacement = "*", x=FLTwtNames) | |
| 150 # FLTwtNames<-paste(FLTwtNames,collapse = "|") | |
| 151 | |
| 152 AccNums<-as.character(Firstsubbackfreq[1,]) | |
| 153 # AccNums<-gsub(pattern = "\\|", replacement = "*", x=AccNums) | |
| 154 | |
| 155 # my_matches<-unique(grep(pattern = FLTwtNames, x=AccNums, value = TRUE)) | |
| 156 # matches <- unique (grep(paste(FLTwtNames,collapse="|"), AccNums, value=TRUE)) | |
| 157 My_matches<- AccNums %in% FLTwtNames | |
| 158 #my matches are which columns in the SBF file still have a motif with them | |
| 159 FirstFINALSBF<-Firstsubbackfreq[My_matches] | |
| 160 FTLFinalMatrix<-FirstFINALSBF | |
| 161 duped<-FTLFinalMatrix[1,] | |
| 162 duped<-duped[!duplicated(duped)] | |
| 163 names(duped)<-NULL | |
| 164 duped<-unlist(duped) | |
| 165 unduped<-FTLFinalMatrix[,!duplicated(duped)] | |
| 166 FTLFinalMatrix<-unduped | |
| 167 # FTLFinalMatrix<-FTLFinalMatrix[2:nrow(FTLFinalMatrix),] | |
| 168 | |
| 169 # columnalheader<-c(rep(NA,36)) | |
| 170 # D835YFinalMatrix<-matrix(data =columnalheader,nrow = 1) | |
| 171 | |
| 172 D835names<-names(D835YmotifsFINAL) | |
| 173 D835names<-D835names[!duplicated(D835names)] | |
| 174 AccNums<-as.character(Secondsubbackfreq[1,]) | |
| 175 My_matches<-AccNums %in% D835names | |
| 176 SecondFINALSBF<-Secondsubbackfreq[My_matches] | |
| 177 D835YFinalMatrix<-SecondFINALSBF | |
| 178 duped<-D835YFinalMatrix[1,] | |
| 179 duped<-duped[!duplicated(duped)] | |
| 180 names(duped)<-NULL | |
| 181 duped<-unlist(duped) | |
| 182 unduped<-D835YFinalMatrix[,!duplicated(duped)] | |
| 183 D835YFinalMatrix<-unduped | |
| 184 #there's a problem, accession numbers are duplicated | |
| 185 | |
| 186 # FTLoutputmatrix<-matrix(data=c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)),ncol = 2) | |
| 187 | |
| 188 FTLwtmotifsFINAL<-gsub(pattern = "O", replacement = " ", x=FTLwtmotifsFINAL) | |
| 189 FTLwtmotifsFINAL<-gsub(pattern = "x", replacement = "", x=FTLwtmotifsFINAL) | |
| 190 FTLoutputmatrix<-matrix(data = c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)), ncol=2) | |
| 191 | |
| 192 # FTLwtmotifsFINAL<-gsub(pattern = "O", replacement = " ", x=FTLwtmotifsFINAL) | |
| 193 # FTLwtmotifsFINAL<-gsub(pattern = "x", replacement = "", x=FTLwtmotifsFINAL) | |
| 194 # FTLoutputmatrix<-matrix(data = c(FTLwtmotifsFINAL,names(FTLwtmotifsFINAL)), ncol=2) | |
| 195 | |
| 196 FLTheader<-c("Substrate","Species","Reference","-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7","Phosphite") | |
| 197 # FLTheader<-unlist(FLTheader) | |
| 198 lefthandFLT<-matrix(data = rep(NA,times=2*nrow(FTLoutputmatrix)),nrow=nrow(FTLoutputmatrix)) | |
| 199 righthandFLT<-matrix(data = rep(NA,times=1*nrow(FTLoutputmatrix)),nrow=nrow(FTLoutputmatrix)) | |
| 200 FLTaccessionset<-FTLoutputmatrix[,2] | |
| 201 FTLmeat<-sapply(FTLoutputmatrix[,1], strsplit, "") | |
| 202 FTLmeat<-sapply(FTLmeat, unlist) | |
| 203 colnames(FTLmeat)<-NULL | |
| 204 FTLmeat<-t(FTLmeat) | |
| 205 | |
| 206 FTLoutputmatrix2<-cbind(lefthandFLT,FLTaccessionset,FTLmeat,righthandFLT) | |
| 207 colnames(FTLoutputmatrix2)<-NULL | |
| 208 rownames(FTLoutputmatrix2)<-NULL | |
| 209 colnames(FLTheader)<-NULL | |
| 210 rownames(FLTheader)<-NULL | |
| 211 | |
| 212 | |
| 213 FirstCentralLettersAGAIN<-FTLoutputmatrix2[,11] | |
| 214 | |
| 215 FirstEsses<-sapply(FirstCentralLettersAGAIN, grepl, pattern="S", ignore.case=TRUE) | |
| 216 FirstTees<-sapply(FirstCentralLettersAGAIN, grepl, pattern="T", ignore.case=TRUE) | |
| 217 FirstWys<-sapply(FirstCentralLettersAGAIN, grepl, pattern="Y", ignore.case=TRUE) | |
| 218 | |
| 219 FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstEsses,"xS") | |
| 220 FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstTees,"xT") | |
| 221 FirstCentralLettersAGAIN<-replace(FirstCentralLettersAGAIN,FirstWys,"xY") | |
| 222 | |
| 223 FirstCentralLettersAGAIN->FTLoutputmatrix2[,11] | |
| 224 | |
| 225 FTLoutputmatrix2<-rbind(FLTheader,FTLoutputmatrix2) | |
| 226 | |
| 227 write.table(x=FTLoutputmatrix2, | |
| 228 file=First_unshared_motifs_table, | |
| 229 quote=FALSE, sep=",", | |
| 230 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 231 | |
| 232 columnalheader<-c("Accession Numbers",as.character(Firstsubbackfreq[1:35,1])) | |
| 233 columnalheader<-matrix(columnalheader,ncol = 1) | |
| 234 # write.table(x=columnalheader, | |
| 235 # file=First_unshared_subbackfreq, | |
| 236 # quote=FALSE, sep=",", | |
| 237 # row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 238 | |
| 239 FTLFinalMatrix<-cbind(columnalheader,FTLFinalMatrix) | |
| 240 | |
| 241 write.table(x=FTLFinalMatrix, | |
| 242 file=First_unshared_subbackfreq, | |
| 243 quote=FALSE, sep=",", | |
| 244 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 245 | |
| 246 ############################################################################################################ | |
| 247 | |
| 248 D835YmotifsFINAL<-gsub(pattern = "O", replacement = " ", x=D835YmotifsFINAL) | |
| 249 D835YmotifsFINAL<-gsub(pattern = "x", replacement = "", x=D835YmotifsFINAL) | |
| 250 D835Youtputmatrix<-matrix(data = c(D835YmotifsFINAL,names(D835YmotifsFINAL)), ncol=2) | |
| 251 | |
| 252 # D835YmotifsFINAL<-gsub(pattern = "O", replacement = " ", x=D835YoutputmatrixL) | |
| 253 # D835YmotifsFINAL<-gsub(pattern = "x", replacement = "", x=D835Youtputmatrix) | |
| 254 # D835Youtputmatrix<-matrix(data = c(D835Youtputmatrix,names(D835Youtputmatrix)), ncol=2) | |
| 255 # | |
| 256 | |
| 257 # D835Youtputmatrix<-matrix(data=c(D835YmotifsFINAL,names(D835YmotifsFINAL)),ncol = 2) | |
| 258 | |
| 259 D835Yheader<-c("Substrate","Species","Reference","-7","-6","-5","-4","-3","-2","-1","0","1","2","3","4","5","6","7","Phosphite") | |
| 260 # D835Yheader<-unlist(D835Yheader) | |
| 261 lefthandD835<-matrix(data = rep(NA,times=2*nrow(D835Youtputmatrix)),nrow=nrow(D835Youtputmatrix)) | |
| 262 righthandD835<-matrix(data = rep(NA,times=1*nrow(D835Youtputmatrix)),nrow=nrow(D835Youtputmatrix)) | |
| 263 D835Yaset<-D835Youtputmatrix[,2] | |
| 264 D835meat<-sapply(D835Youtputmatrix[,1], strsplit, "") | |
| 265 D835meat<-sapply(D835meat, unlist) | |
| 266 colnames(D835meat)<-NULL | |
| 267 D835meat<-t(D835meat) | |
| 268 | |
| 269 # FTLmeat<-sapply(FTLoutputmatrix[,1], strsplit, "") | |
| 270 # FTLmeat<-sapply(FTLmeat, unlist) | |
| 271 # colnames(FTLmeat)<-NULL | |
| 272 # FTLmeat<-t(FTLmeat) | |
| 273 | |
| 274 | |
| 275 D835Youtputmatrix2<-cbind(lefthandD835,D835Yaset,D835meat,righthandD835) | |
| 276 colnames(D835Youtputmatrix2)<-NULL | |
| 277 rownames(D835Youtputmatrix2)<-NULL | |
| 278 colnames(D835Yheader)<-NULL | |
| 279 rownames(D835Yheader)<-NULL | |
| 280 | |
| 281 | |
| 282 SecondCentralLettersAGAIN<-D835Youtputmatrix2[,11] | |
| 283 | |
| 284 SecondEsses<-sapply(SecondCentralLettersAGAIN, grepl, pattern="S", ignore.case=TRUE) | |
| 285 SecondTees<-sapply(SecondCentralLettersAGAIN, grepl, pattern="T", ignore.case=TRUE) | |
| 286 SecondWys<-sapply(SecondCentralLettersAGAIN, grepl, pattern="Y", ignore.case=TRUE) | |
| 287 | |
| 288 SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondEsses,"xS") | |
| 289 SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondTees,"xT") | |
| 290 SecondCentralLettersAGAIN<-replace(SecondCentralLettersAGAIN,SecondWys,"xY") | |
| 291 | |
| 292 SecondCentralLettersAGAIN->D835Youtputmatrix2[,11] | |
| 293 | |
| 294 D835Youtputmatrix2<-rbind(D835Yheader,D835Youtputmatrix2) | |
| 295 | |
| 296 write.table(x=D835Youtputmatrix2, | |
| 297 file=Second_unshared_motifs_table, | |
| 298 quote=FALSE, sep=",", | |
| 299 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 300 | |
| 301 columnalheader<-c("Accession Numbers",as.character(Firstsubbackfreq[1:35,1])) | |
| 302 columnalheader<-matrix(columnalheader,ncol = 1) | |
| 303 # write.table(x=columnalheader, | |
| 304 # file=Second_unshared_subbackfreq, | |
| 305 # quote=FALSE, sep=",", | |
| 306 # row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 307 | |
| 308 D835YFinalMatrix<-cbind(columnalheader,D835YFinalMatrix) | |
| 309 | |
| 310 write.table(x=D835YFinalMatrix, | |
| 311 file=Second_unshared_subbackfreq, | |
| 312 quote=FALSE, sep=",", | |
| 313 row.names=FALSE,col.names = FALSE, na="", append=TRUE) | |
| 314 } | |
| 315 | |
| 316 | |
| 317 | |
| 318 # for (i in 1:nrow(FirstSubstrateSet)){ | |
| 319 # FTLwtletters<-FirstSubstrateSet[i,4:18] | |
| 320 # FTLwtletters<-FTLwtletters[FTLwtletters !="XXXXX"] | |
| 321 # FTLwtletters<-paste(FTLwtletters, sep="", collapse="") | |
| 322 # leftspaces<-c() | |
| 323 # rightspaces<-c() | |
| 324 # | |
| 325 # YYYmotif <- unlist(strsplit(FTLwtletters, split = "")) | |
| 326 # YYYposition <- match(x = "x", table = YYYmotif) | |
| 327 # #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 328 # #just 3 letters to the left of x | |
| 329 # | |
| 330 # YYYLettersToTheLeft <- YYYposition - 1 | |
| 331 # #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 332 # #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 333 # YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 334 # #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 335 # #variable the user puts in is | |
| 336 # | |
| 337 # | |
| 338 # if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
| 339 # leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
| 340 # rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
| 341 # #add blank spaces if the motif has less than 4 letters to the left/right | |
| 342 # motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 343 # #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 344 # motif<-motif[!motif %in% "x"] | |
| 345 # motif<-paste(motif, sep="", collapse="") | |
| 346 # FTLwtletters<-motif | |
| 347 # FTLwtmotifs[i,1]<-FTLwtletters | |
| 348 # FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 349 # } | |
| 350 # | |
| 351 # if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
| 352 # motif<-YYYmotif | |
| 353 # #add blank spaces if the motif has less than 4 letters to the left/right | |
| 354 # motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 355 # #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 356 # motif<-motif[!motif %in% "x"] | |
| 357 # motif<-paste(motif, sep="", collapse="") | |
| 358 # FTLwtletters<-motif | |
| 359 # FTLwtmotifs[i,1]<-FTLwtletters | |
| 360 # FTLwtAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 361 # | |
| 362 # | |
| 363 # } | |
| 364 # | |
| 365 # } | |
| 366 | |
| 367 | |
| 368 | |
| 369 # for (i in 1:nrow(SecondSubstrateSet)){ | |
| 370 # D835letters<-SecondSubstrateSet[i,4:18] | |
| 371 # D835letters<-D835letters[D835letters !="XXXXX"] | |
| 372 # D835letters<-paste(D835letters, sep="", collapse="") | |
| 373 # leftspaces<-c() | |
| 374 # rightspaces<-c() | |
| 375 # | |
| 376 # YYYmotif <- unlist(strsplit(D835letters, split = "")) | |
| 377 # YYYposition <- match(x = "x", table = YYYmotif) | |
| 378 # #position itself tells me how much is to the left of that X by what it's number is. x at position 4 tells me that there are | |
| 379 # #just 3 letters to the left of x | |
| 380 # | |
| 381 # YYYLettersToTheLeft <- YYYposition - 1 | |
| 382 # #how many letters to the right SHOULD just be length(motif)-position-1 if it's 5 long and x is at 3 then Y is at 4 and there is | |
| 383 # #just 1 spot to the right of Y so LettersToTheRight<-1 because 5-3-1=1 | |
| 384 # YYYLettersToTheRight <- length(YYYmotif) - YYYposition - 1 | |
| 385 # #then sanity check, we're currently looking only at +/-4, but this spot allows for up to +/- 7 as well, just depends on what the | |
| 386 # #variable the user puts in is | |
| 387 # if (YYYLettersToTheLeft < 7 | YYYLettersToTheRight < 7) { | |
| 388 # leftspaces<-rep(" ",times=(7-YYYLettersToTheLeft)) | |
| 389 # rightspaces<-rep(" ",times=7-(YYYLettersToTheRight)) | |
| 390 # #add blank spaces if the motif has less than 4 letters to the left/right | |
| 391 # motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 392 # #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 393 # motif<-motif[!motif %in% "x"] | |
| 394 # motif<-paste(motif, sep="", collapse="") | |
| 395 # D835letters<-motif | |
| 396 # D835Ymotifs[i,1]<-D835letters | |
| 397 # D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 398 # } | |
| 399 # | |
| 400 # if(YYYLettersToTheLeft>6 && YYYLettersToTheRight>6){ | |
| 401 # motif<-YYYmotif | |
| 402 # #add blank spaces if the motif has less than 4 letters to the left/right | |
| 403 # motif<-c(leftspaces,YYYmotif,rightspaces) | |
| 404 # #save that motif, which is the Y and +/- 4 amino acids, including truncation | |
| 405 # motif<-motif[!motif %in% "x"] | |
| 406 # motif<-paste(motif, sep="", collapse="") | |
| 407 # D835letters<-motif | |
| 408 # D835Ymotifs[i,1]<-D835letters | |
| 409 # D835YAccessionNumbers[i,1]<-FirstSubstrateSet[i,3] | |
| 410 # } | |
| 411 # } | |
| 412 | |
| 413 # for (k in 1:length(FTLwtmotifsFINAL)) { | |
| 414 # AN<-00000 | |
| 415 # #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is | |
| 416 # #destroyed immediately after use | |
| 417 # for (m in 1:ncol(Firstsubbackfreq)) { | |
| 418 # AN <- as.character(Firstsubbackfreq[1, m]) | |
| 419 # if (grepl(pattern = AN, | |
| 420 # x = names(FTLwtmotifsFINAL[k]), | |
| 421 # fixed = TRUE) == TRUE) { | |
| 422 # outputmatrix <- as.character(Firstsubbackfreq[, m]) | |
| 423 # outputmatrix <- matrix(outputmatrix, nrow = 1) | |
| 424 # #with that accession number, find a match in the subbackfreq file and save it here | |
| 425 # FTLFinalMatrix<-rbind(FTLFinalMatrix,outputmatrix) | |
| 426 # } | |
| 427 # } | |
| 428 # } | |
| 429 # for (k in 1:length(D835YmotifsFINAL)) { | |
| 430 # #I don't remember why, but I felt it necessary to destroy the accession number multiple times to ensure it is | |
| 431 # #destroyed immediately after use | |
| 432 # for (m in 1:ncol(Secondsubbackfreq)) { | |
| 433 # AN <- as.character(Secondsubbackfreq[1, m]) | |
| 434 # if (grepl(pattern = AN, | |
| 435 # x = names(D835YmotifsFINAL[k]), | |
| 436 # fixed = TRUE) == TRUE) { | |
| 437 # outputmatrix <- as.character(Secondsubbackfreq[, m]) | |
| 438 # outputmatrix <- matrix(outputmatrix, nrow = 1) | |
| 439 # #with that accession number, find a match in the subbackfreq file and save it here | |
| 440 # D835YFinalMatrix<-rbind(D835YFinalMatrix,outputmatrix) | |
| 441 # } | |
| 442 # } | |
| 443 # } |
