Mercurial > repos > jfb > kinatest_7_7
changeset 3:ef1c1412e275 draft
Uploaded
| author | jfb |
|---|---|
| date | Tue, 27 Feb 2018 16:51:29 -0500 |
| parents | 00e1ca8f787c |
| children | 0ce565b45cb3 |
| files | kinatestid_r/Kinatest-R.R kinatestid_r/kinatestid_r.xml kinatestid_r/screener7-7.csv |
| diffstat | 3 files changed, 181 insertions(+), 91 deletions(-) [+] |
line wrap: on
line diff
--- a/kinatestid_r/Kinatest-R.R Tue Feb 27 15:24:12 2018 -0500 +++ b/kinatestid_r/Kinatest-R.R Tue Feb 27 16:51:29 2018 -0500 @@ -18,9 +18,8 @@ Sub<-na.omit(SubstrateBackgroundFrequency) SubstrateBackgroundFrequency<-Sub - - - +args = commandArgs(trailingOnly=TRUE) +TodaysKinase<-args[1] @@ -1161,25 +1160,25 @@ Positionm5<-which(goodones[,3] %in% TRUE) if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))} Positionm4<-which(goodones[,4] %in% TRUE) -if (length(Positionm4)<1){Positionm4<-which(bareSDs[,4]==max(bareSDs[,4]))} +if (length(Positionm4)<2){Positionm4<-bareSDs[,4][order(bareSDs[,4])[1:2]]} Positionm3<-which(goodones[,5] %in% TRUE) -if (length(Positionm3)<1){Positionm3<-which(bareSDs[,5]==max(bareSDs[,5]))} +if (length(Positionm3)<2){Positionm3<-bareSDs[,5][order(bareSDs[,5])[1:2]]} Positionm2<-which(goodones[,6] %in% TRUE) -if (length(Positionm2)<1){Positionm2<-which(bareSDs[,6]==max(bareSDs[,6]))} +if (length(Positionm2)<2){Positionm2<-bareSDs[,6][order(bareSDs[,6])[1:2]]} Positionm1<-which(goodones[,7] %in% TRUE) -if (length(Positionm1)<1){Positionm1<-which(bareSDs[,7]==max(bareSDs[,7]))} +if (length(Positionm1)<2){Positionm1<-bareSDs[,7][order(bareSDs[,7])[1:2]]} Positiond0<-which(goodones[,8] %in% TRUE) if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))} Positionp1<-which(goodones[,9] %in% TRUE) -if (length(Positionp1)<1){Positionp1<-which(bareSDs[,9]==max(bareSDs[,9]))} +if (length(Positionp1)<2){Positionp1<-bareSDs[,9][order(bareSDs[,9])[1:2]]} Positionp2<-which(goodones[,10] %in% TRUE) -if (length(Positionp2)<1){Positionp2<-which(bareSDs[,10]==max(bareSDs[,10]))} +if (length(Positionp2)<2){Positionp2<-bareSDs[,10][order(bareSDs[,10])[1:2]]} Positionp3<-which(goodones[,11] %in% TRUE) -if (length(Positionp3)<1){Positionp3<-which(bareSDs[,11]==max(bareSDs[,11]))} +if (length(Positionp3)<2){Positionp3<-bareSDs[,11][order(bareSDs[,11])[1:2]]} Positionp4<-which(goodones[,12] %in% TRUE) -if (length(Positionp4)<1){Positionp4<-which(bareSDs[,12]==max(bareSDs[,12]))} +if (length(Positionp4)<2){Positionp4<-bareSDs[,12][order(bareSDs[,12])[1:2]]} Positionp5<-which(goodones[,13] %in% TRUE) if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))} Positionp6<-which(goodones[,14] %in% TRUE) @@ -1187,6 +1186,74 @@ Positionp7<-which(goodones[,15] %in% TRUE) if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))} + +# Positionm7<-which(goodones[,1] %in% TRUE) +# if (length(Positionm7)<1){Positionm7<-which(bareSDs[,1]==max(bareSDs[,1]))} +# Positionm6<-which(goodones[,2] %in% TRUE) +# if (length(Positionm6)<1){Positionm6<-which(bareSDs[,2]==max(bareSDs[,2]))} +# Positionm5<-which(goodones[,3] %in% TRUE) +# if (length(Positionm5)<1){Positionm5<-which(bareSDs[,3]==max(bareSDs[,3]))} +# Positionm4<-which(goodones[,4] %in% TRUE) +# if (length(Positionm4)<1){Positionm4<-which(bareSDs[,4]==max(bareSDs[,4]))} +# Positionm3<-which(goodones[,5] %in% TRUE) +# if (length(Positionm3)<1){Positionm3<-which(bareSDs[,5]==max(bareSDs[,5]))} +# Positionm2<-which(goodones[,6] %in% TRUE) +# if (length(Positionm2)<1){Positionm2<-which(bareSDs[,6]==max(bareSDs[,6]))} +# Positionm1<-which(goodones[,7] %in% TRUE) +# if (length(Positionm1)<1){Positionm1<-which(bareSDs[,7]==max(bareSDs[,7]))} +# +# Positiond0<-which(goodones[,8] %in% TRUE) +# if (length(Positiond0)<1){Positiond0<-which(bareSDs[,8]==max(bareSDs[,8]))} +# +# Positionp1<-which(goodones[,9] %in% TRUE) +# if (length(Positionp1)<1){Positionp1<-which(bareSDs[,9]==max(bareSDs[,9]))} +# Positionp2<-which(goodones[,10] %in% TRUE) +# if (length(Positionp2)<1){Positionp2<-which(bareSDs[,10]==max(bareSDs[,10]))} +# Positionp3<-which(goodones[,11] %in% TRUE) +# if (length(Positionp3)<1){Positionp3<-which(bareSDs[,11]==max(bareSDs[,11]))} +# Positionp4<-which(goodones[,12] %in% TRUE) +# if (length(Positionp4)<1){Positionp4<-which(bareSDs[,12]==max(bareSDs[,12]))} +# Positionp5<-which(goodones[,13] %in% TRUE) +# if (length(Positionp5)<1){Positionp5<-which(bareSDs[,13]==max(bareSDs[,13]))} +# Positionp6<-which(goodones[,14] %in% TRUE) +# if (length(Positionp6)<1){Positionp6<-which(bareSDs[,14]==max(bareSDs[,14]))} +# Positionp7<-which(goodones[,15] %in% TRUE) +# if (length(Positionp7)<1){Positionp7<-which(bareSDs[,15]==max(bareSDs[,15]))} + +# Positionm7<-which(goodones[,1] %in% TRUE) +# if (length(Positionm7)<3){Positionm7<-bareSDs[,1][order(bareSDs[,1])[1:3]]} +# Positionm6<-which(goodones[,2] %in% TRUE) +# if (length(Positionm6)<3){Positionm6<-bareSDs[,2][order(bareSDs[,2])[1:3]]} +# Positionm5<-which(goodones[,3] %in% TRUE) +# if (length(Positionm5)<3){Positionm5<-bareSDs[,3][order(bareSDs[,3])[1:3]]} +# Positionm4<-which(goodones[,4] %in% TRUE) +# if (length(Positionm4)<3){Positionm4<-bareSDs[,4][order(bareSDs[,4])[1:3]]} +# Positionm3<-which(goodones[,5] %in% TRUE) +# if (length(Positionm3)<3){Positionm3<-bareSDs[,5][order(bareSDs[,5])[1:3]]} +# Positionm2<-which(goodones[,6] %in% TRUE) +# if (length(Positionm2)<3){Positionm2<-bareSDs[,6][order(bareSDs[,6])[1:3]]} +# Positionm1<-which(goodones[,7] %in% TRUE) +# if (length(Positionm1)<3){Positionm1<-bareSDs[,7][order(bareSDs[,7])[1:3]]} +# +# Positiond0<-which(goodones[,8] %in% TRUE) +# #if (length(Positiond0)<3){Positiond0<-bareSDs[,8][order(bareSDs[,8])[1:3]]} +# +# Positionp1<-which(goodones[,9] %in% TRUE) +# if (length(Positionp1)<3){Positionp1<-bareSDs[,9][order(bareSDs[,9])[1:3]]} +# Positionp2<-which(goodones[,10] %in% TRUE) +# if (length(Positionp2)<3){Positionp2<-bareSDs[,10][order(bareSDs[,10])[1:3]]} +# Positionp3<-which(goodones[,11] %in% TRUE) +# if (length(Positionp3)<3){Positionp3<-bareSDs[,11][order(bareSDs[,11])[1:3]]} +# Positionp4<-which(goodones[,12] %in% TRUE) +# if (length(Positionp4)<3){Positionp4<-bareSDs[,12][order(bareSDs[,12])[1:3]]} +# Positionp5<-which(goodones[,13] %in% TRUE) +# if (length(Positionp5)<3){Positionp5<-bareSDs[,13][order(bareSDs[,13])[1:3]]} +# Positionp6<-which(goodones[,14] %in% TRUE) +# if (length(Positionp6)<3){Positionp6<-bareSDs[,14][order(bareSDs[,14])[1:3]]} +# Positionp7<-which(goodones[,15] %in% TRUE) +# if (length(Positionp7)<3){Positionp7<-bareSDs[,15][order(bareSDs[,15])[1:3]]} + + aa_props2 <- c("1"="A", "2"="C", "3"="D", "4"="E", "5"="F", "6"="G", "7"="H", "8"="I", "9"="K", "10"="L", "11"="M", "12"="N", "13"="P", "14"="Q", "15"="R", "16"="S", "17"="T", "18"="V", "19"="W", "20"="Y") @@ -1707,68 +1774,81 @@ AblThresh<-as.numeric(Abl[24,1]) AblTrueThresh<-((AblThresh*AblNorm)/(100-AblThresh)) AblActive<-unlist(AblGeneratedScores)>AblTrueThresh +If(TodaysKinase=="ABL"){AblActive<-rep(0,times=nrow(GeneratedPeptides))} ArgNorm<-1/as.numeric(Arg[22,1]) ArgThresh<-as.numeric(Arg[24,1]) ArgTrueThresh<-((ArgThresh*ArgNorm)/(100-ArgThresh)) ArgActive<-unlist(ArgGeneratedScores)>ArgTrueThresh +If(TodaysKinase=="ARG"){ArgActive<-rep(0,times=nrow(GeneratedPeptides))} BtkNorm<-1/as.numeric(Btk[22,1]) BtkThresh<-as.numeric(Btk[24,1]) BtkTrueThresh<-((BtkThresh*BtkNorm)/(100-BtkThresh)) BtkActive<-unlist(BtkGeneratedScores)>BtkTrueThresh +If(TodaysKinase=="BTK"){BtkActive<-rep(0,times=nrow(GeneratedPeptides))} CskNorm<-1/as.numeric(Csk[22,1]) CskThresh<-as.numeric(Csk[24,1]) CskTrueThresh<-((CskThresh*CskNorm)/(100-CskThresh)) CskActive<-(CskGeneratedScores)>CskTrueThresh +If(TodaysKinase=="CSK"){CskActive<-rep(0,times=nrow(GeneratedPeptides))} FynNorm<-1/as.numeric(Fyn[22,1]) FynThresh<-as.numeric(Fyn[24,1]) FynTrueThresh<-((FynThresh*FynNorm)/(100-FynThresh)) FynActive<-unlist(FynGeneratedScores)>FynTrueThresh +If(TodaysKinase=="FYN"){FynActive<-rep(0,times=nrow(GeneratedPeptides))} HckNorm<-1/as.numeric(Hck[22,1]) HckThresh<-as.numeric(Hck[24,1]) HckTrueThresh<-((HckThresh*HckNorm)/(100-HckThresh)) HckActive<-unlist(HckGeneratedScores)>HckTrueThresh +If(TodaysKinase=="HCK"){HckActive<-rep(0,times=nrow(GeneratedPeptides))} JAK2Norm<-1/as.numeric(JAK2[22,1]) JAK2Thresh<-as.numeric(JAK2[24,1]) JAK2TrueThresh<-((JAK2Thresh*JAK2Norm)/(100-JAK2Thresh)) JAk2Active<-unlist(JAK2GeneratedScores)>JAK2TrueThresh +If(TodaysKinase=="JAK2"){JAk2Active<-rep(0,times=nrow(GeneratedPeptides))} LckNorm<-1/as.numeric(Lck[22,1]) LckThresh<-as.numeric(Lck[24,1]) LckTrueThresh<-((LckThresh*LckNorm)/(100-LckThresh)) LckActive<-unlist(LckGeneratedScores)>LckTrueThresh +If(TodaysKinase=="LCK"){LckActive<-rep(0,times=nrow(GeneratedPeptides))} LynNorm<-1/as.numeric(Lyn[22,1]) LynThresh<-as.numeric(Lyn[24,1]) LynTrueThresh<-((LynThresh*LynNorm)/(100-LynThresh)) LynActive<-unlist(LynGeneratedScores)>LynTrueThresh +If(TodaysKinase=="LYN"){LynActive<-rep(0,times=nrow(GeneratedPeptides))} Pyk2Norm<-1/as.numeric(Pyk2[22,1]) Pyk2Thresh<-as.numeric(Pyk2[24,1]) Pyk2TrueThresh<-((Pyk2Thresh*Pyk2Norm)/(100-Pyk2Thresh)) Pyk2Active<-unlist(Pyk2GeneratedScores)>Pyk2TrueThresh +If(TodaysKinase=="PYK2"){Pyk2Active<-rep(0,times=nrow(GeneratedPeptides))} SrcNorm<-1/as.numeric(Src[22,1]) SrcThresh<-as.numeric(Src[24,1]) SrcTrueThresh<-((SrcThresh*SrcNorm)/(100-SrcThresh)) SrcActive<-unlist(SrcGeneratedScores)>SrcTrueThresh +If(TodaysKinase=="SRC"){SrcActive<-rep(0,times=nrow(GeneratedPeptides))} SykNorm<-1/as.numeric(Syk[22,1]) SykThresh<-as.numeric(Syk[24,1]) SykTrueThresh<-((SykThresh*SykNorm)/(100-SykThresh)) SykActive<-unlist(SykGeneratedScores)>SykTrueThresh +If(TodaysKinase=="SYK"){SykActive<-rep(0,times=nrow(GeneratedPeptides))} YesNorm<-1/as.numeric(Yes[22,1]) YesThresh<-as.numeric(Yes[24,1]) YesTrueThresh<-((YesThresh*YesNorm)/(100-YesThresh)) YesActive<-unlist(YesGeneratedScores)>YesTrueThresh +If(TodaysKinase=="YES"){YesActive<-rep(0,times=nrow(GeneratedPeptides))} -AllActive<-AblActive+ArgActive+BtkActive+CskActive+FynActive+HckActive+JAk2Active+LckActive+LynActive+Pyk2Active+SrcActive+SykActive+YesActive +AllActive<-AblActive+ArgActive+BtkActive+CskActive+FynActive+HckActive+JAk2Active+LckActive+LynActive+Pyk2Active+SrcActive+SykActive+YesActive+FLT3Active #Btkactive+ Scores<-ThisKinGeneratedScores @@ -1877,23 +1957,24 @@ #create the MCC table -threshold<-c(1:100) -threshold<-order(threshold,decreasing = TRUE) +threshold<-c(1:100,(1:9)/10,(1:9)/100,0,-.1) +threshold<-threshold[order(threshold,decreasing = TRUE)] +threshold -Truepositives<-c(1:100) -Falsenegatives<-c(1:100) -Sensitivity<-c(1:100) -TrueNegatives<-c(1:100) -FalsePositives<-c(1:100) -Specificity<-c(1:100) -Accuracy<-c(1:100) -MCC<-c(1:100) -EER<-c(1:100) +Truepositives<-c(1:120) +Falsenegatives<-c(1:120) +Sensitivity<-c(1:120) +TrueNegatives<-c(1:120) +FalsePositives<-c(1:120) +Specificity<-c(1:120) +Accuracy<-c(1:120) +MCC<-c(1:120) +EER<-c(1:120) #MAKE DAMN SURE THAT THE ACCESSION NUMBERS FOLLOW THE MOTIFS -for (z in 1:100) { - thres<-101-z +for (z in 1:120) { + thres<-threshold[z] Truepositives[z]<-length(PositiveWeirdScores[PositiveWeirdScores>=(thres)]) Falsenegatives[z]<-nrow(positivesubstrates)-Truepositives[z] Sensitivity[z]<-Truepositives[z]/(Falsenegatives[z]+Truepositives[z]) @@ -1902,10 +1983,10 @@ FalsePositives[z]<-nrow(NegativeSubstrateList)-TrueNegatives[z] Specificity[z]<-1-(TrueNegatives[z]/(FalsePositives[z]+TrueNegatives[z])) Accuracy[z]<-100*(Truepositives[z]+TrueNegatives[z])/(Falsenegatives[z]+FalsePositives[z]+TrueNegatives[z]+Truepositives[z]) - MCC[z]<-((Truepositives[z]+TrueNegatives[z])-(Falsenegatives[z]+FalsePositives[z]))/sqrt(round(round(Truepositives[z]+Falsenegatives[z])*round(TrueNegatives[z]+FalsePositives[z])*round(Truepositives[z]+FalsePositives[z])*round(TrueNegatives[z]+Falsenegatives[z]))) + MCC[z]<-((Truepositives[z]*TrueNegatives[z])-(Falsenegatives[z]*FalsePositives[z]))/sqrt(round(round(Truepositives[z]+Falsenegatives[z])*round(TrueNegatives[z]+FalsePositives[z])*round(Truepositives[z]+FalsePositives[z])*round(TrueNegatives[z]+Falsenegatives[z]))) EER[z]<-.01*(((1-(Sensitivity[z]))*(Truepositives[z]+Falsenegatives[z]))+(Specificity[z]*(1-(Truepositives[z]+Falsenegatives[z])))) } -Characterization<-cbind.data.frame(threshold,Truepositives,Falsenegatives,Sensitivity,TrueNegatives,FalsePositives,Specificity,Accuracy,MCC,EER) +Characterization<-cbind.data.frame(threshold,Truepositives,Falsenegatives,Sensitivity,TrueNegatives,FalsePositives,Specificity,MCC,EER) positiveheader<-c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,"RPMS","PMS") positivewithscores<-rbind.data.frame(positiveheader,positivewithscores) @@ -1924,4 +2005,5 @@ # header<-colnames(RanksPeptides) # RanksPeptides<-rbind.data.frame(header,RanksPeptides) +write.table(x="Off Target Kinase activity (your kinase of interest should have zeros here because it is ON-target)",file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",") write.table(RanksPeptides,file = FILENAME3,append = FALSE,row.names = FALSE,col.names = TRUE,sep = ",")
--- a/kinatestid_r/kinatestid_r.xml Tue Feb 27 15:24:12 2018 -0500 +++ b/kinatestid_r/kinatestid_r.xml Tue Feb 27 16:51:29 2018 -0500 @@ -8,7 +8,7 @@ ln -s '$substrates' input1 && ln -s '$negatives' input2 && ln -s '$SBF' input3 && - Rscript '$__tool_directory__/Kinatest-R.R' && + Rscript '$__tool_directory__/Kinatest-R.R' outGroup && mv output1 output1.csv && mv output2 output2.csv && mv output3 output3.csv @@ -17,11 +17,12 @@ <param format="csv" name="substrates" type="data" label="Positive/Phosphorylated Substrate List"/> <param format="csv" name="negatives" type="data" label="Negative/unPhosphorylated Substrate List"/> <param format="csv" name="SBF" type="data" label="Substrate Background Frequency List"/> + <param name="outGroup" type="text" value"kinase" label="Kinase Name"/> </inputs> <outputs> - <data format="csv" name="SDtable" from_work_dir="output1.csv" label="Standard Deviation Table"/> - <data format="csv" name="EPM" from_work_dir="output2.csv" label="Endogenous Probability Matrix"/> - <data format="csv" name="Characterization" from_work_dir="output3.csv" label="Characterization Table"/> + <data format="csv" name="SDtable" from_work_dir="output1.csv" label="${outGroup}_Standard Deviation Table"/> + <data format="csv" name="EPM" from_work_dir="output2.csv" label="${outGroup}_Characterization Table"/> + <data format="csv" name="Characterization" from_work_dir="output3.csv" label="${outGroup}_Generated Sequences"/> </outputs> <tests> <test> @@ -36,6 +37,13 @@ <help><