annotate offtarget_v7.R @ 0:b73bf7e5bb41 draft default tip

Uploaded
author mkim8
date Fri, 14 Mar 2014 14:31:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
1 library(methods)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
2 library(lattice)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
3 suppressMessages(library(Matrix))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
4 suppressMessages(library(ClassComparison))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
5 suppressMessages(library(glmnet))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
6 options(warn=-1)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
7
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
8 #######################
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
9 ### arguments input ###
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
10 #######################
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
11
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
12 arg=commandArgs(TRUE)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
13
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
14 InputFile=arg[1] # must be given by user including both
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
15 # response variable and siRNA sequences
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
16
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
17 Seed=as.integer(arg[2]) # Name: "Seed"
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
18 # specify the seed region to be used in analysis
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
19 # an interger out of 1~14.
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
20 # default is 2, which means 2~7 hexamer is used
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
21
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
22 Strand=arg[3] # Name:"Strand Orientation for Analysis"
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
23 # specify which strand is used in analysis
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
24 # must be given by user, options are "sense",
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
25 #"antisense" or "both"
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
26
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
27 Lambda=as.numeric(arg[4]) # Name:"Lambda Value"
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
28 # penalty parameter in LASSO regression
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
29 # default = 0.001, which can be changed by user
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
30 # in order to perform differennt LASSO estimation
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
31
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
32 Library=arg[5] # Name:"siRNA Library to use"
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
33 # user has to specify which siRNA library will be
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
34 # used in analysis can be "Custom", "Ambion", "New Dharmacon"
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
35 # or "Old Dharmacon". Default is "Custom"
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
36
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
37 ### new parameter ###
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
38
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
39 Str=as.numeric(arg[6]) # Name:"Strength of seed-linked effect“
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
40 # specify cutoff for strenth of seed-linked effet, must be positive
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
41 # default is 1
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
42
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
43 Sig=as.numeric(arg[7]) # Name:"Significance (P value)"
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
44 # specify cutoff for significance (p value)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
45 # default is 0.01
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
46
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
47 #################
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
48 ### functions ###
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
49 #################
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
50
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
51 # uppercase a sequence and transform T to U ###
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
52
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
53 TtoU=function(x){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
54 x=as.character(x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
55 x=toupper(x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
56 x=chartr("T", "U", x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
57 return(x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
58 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
59
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
60 # reverse a sequence #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
61
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
62 StrReverse <- function(x){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
63 x=as.character(x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
64 x=sapply(lapply(strsplit(x, NULL), rev), paste, collapse="")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
65 return(x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
66 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
67
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
68 # generate complementary sequence #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
69
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
70 DuplexReverse=function(x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
71 {
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
72 x=as.character(x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
73 x=gsub("U","T",x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
74 x=gsub("A","U",x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
75 x=gsub("T","A",x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
76 x=gsub("G","g",x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
77 x=gsub("C","G",x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
78 x=gsub("g","C",x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
79 x=StrReverse(x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
80 return(x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
81 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
82
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
83 # check if input contains AUGC only #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
84
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
85 SeqCheck=function(x){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
86 x=as.character(x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
87 x=strsplit(x, "")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
88 x=unlist(x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
89 if(sum(x %in% c("A", "U", "G", "C")) == length(x) ){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
90 return(TRUE)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
91 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
92 else{
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
93 return(FALSE)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
94 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
95 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
96
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
97 # extract seed sequence from siRNA #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
98
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
99 SeedExtract=function(x, start=2, end=7){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
100 x=as.character(x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
101 x=substr(x, start, end)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
102 return(x)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
103 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
104
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
105 # examine if a pool contain a seed #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
106
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
107 SeedExist=function(dfr, s){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
108 dfr=data.frame(dfr)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
109 s=as.character(s)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
110 return(as.numeric(apply(dfr == s, 1, sum)))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
111 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
112
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
113 # seed analysis #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
114
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
115 SeedAnalysis=function(Z, data, start=2, end=7){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
116
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
117 print("Generating seed.dfr ... ... ")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
118 data=data.frame(data)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
119 seed.dfr=data
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
120 for(i in 1:ncol(seed.dfr)){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
121 seed.dfr[, i]=as.character(SeedExtract(seed.dfr[, i], start=start, end=end))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
122 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
123 colnames(seed.dfr)=paste("seed", 1:ncol(seed.dfr), sep=".")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
124 print("Generating seed.table ... ... ")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
125 seed.table=data.frame(table(unlist(seed.dfr)))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
126 names(seed.table)=c("seed", "family.size")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
127
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
128 seed.table$ks.p.value=rep(NA, length(seed.table$seed))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
129 y=Z
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
130 for(i in 1:length(seed.table$seed)){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
131 s=SeedExist(seed.dfr, seed.table$seed[i])
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
132 seed.table$ks.p.value[i]=
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
133 ks.test(y[s > 0], y[s== 0])$p.value
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
134 print(paste("seed ", i))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
135 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
136
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
137 return(list(Z=Z, data=data, seed.table=seed.table, seed.dfr=seed.dfr))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
138 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
139
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
140 # offtarget analysis #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
141
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
142 OfftargetAnalysis=function(object, s=0.001){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
143
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
144 seed.dfr=object$seed.dfr
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
145 seed.table=object$seed.table
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
146 y=object$Z
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
147
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
148 print("Generating Matrix X ... ... ")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
149 s.sub=as.character(seed.table$seed)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
150 x=matrix(0, length(y), length(s.sub))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
151 colnames(x)=as.character(s.sub)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
152
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
153 for(i in 1:length(s.sub)){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
154 n=as.character(s.sub[i])
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
155 x[, n]=SeedExist(seed.dfr, n)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
156 print(paste("Matrix X seed ", i))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
157 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
158
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
159 print("Fitting ... ... ")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
160 gc()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
161
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
162 fit=glmnet(x, y)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
163
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
164 print("offtarget analysis")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
165 offtarget=data.frame(as.matrix(coef(fit, s=s)))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
166 offtarget$seed=rownames(offtarget)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
167 offtarget=offtarget[, c(2, 1)]
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
168 names(offtarget)[2]="coef"
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
169 rownames(offtarget)=NULL
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
170
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
171 offtarget=merge(offtarget, seed.table, by="seed", all.x=T)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
172 y.fit=as.numeric(predict(fit, x, s=s))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
173 y.corrected=y-y.fit
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
174 print("Off-target analysis Done!")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
175
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
176 return(list(fit=fit, X=x, offtarget=offtarget, Z.corrected=y.corrected))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
177
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
178
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
179 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
180
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
181 # generating data frame of siRNA sequence #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
182
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
183 SeqGenerate=function(SeqDat){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
184 SenseSeq=data.frame(apply(SeqDat, 2, TtoU))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
185 AntiSeq=data.frame(apply(SenseSeq, 2, DuplexReverse))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
186 for(i in 1:ncol(SenseSeq)){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
187 AntiSeq[, i]=DuplexReverse(SenseSeq[, i])
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
188 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
189 BothSeq=cbind(SenseSeq, AntiSeq)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
190 names(BothSeq)=paste("duplex", 1:ncol(BothSeq), sep=".")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
191 output=list(SenseSeq=SenseSeq, AntiSeq=AntiSeq, BothSeq=BothSeq)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
192 return(output)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
193 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
194
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
195 # DataAnalysis #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
196
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
197 DataAnalysis=function(Z, Seq, SeedStart, Strand, Lambda){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
198
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
199 SeedEnd=SeedStart+5
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
200
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
201 analysis=list()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
202
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
203 if(Strand == 'sense'){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
204 Seq=Seq$SenseSeq
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
205 analysis$seed.analysis=SeedAnalysis(Z=Z, data=Seq, start=SeedStart, end=SeedEnd)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
206 analysis$offtarget.analysis=OfftargetAnalysis(analysis$seed.analysis, s=Lambda)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
207 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
208 else if(Strand == "antisense"){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
209 Seq=Seq$AntiSeq
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
210 analysis$seed.analysis=SeedAnalysis(Z=Z, data=Seq, start=SeedStart, end=SeedEnd)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
211 analysis$offtarget.analysis=OfftargetAnalysis(analysis$seed.analysis, s=Lambda)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
212 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
213 else if(Strand == 'both'){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
214 Seq=Seq$BothSeq
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
215 analysis$seed.analysis=SeedAnalysis(Z=Z, data=Seq, start=SeedStart, end=SeedEnd)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
216 analysis$offtarget.analysis=OfftargetAnalysis(analysis$seed.analysis, s=Lambda)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
217 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
218 return(analysis)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
219 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
220
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
221 # new functions #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
222
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
223 # FDR calculation #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
224
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
225 fdr.bum=function(p.value, Sig){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
226 p.sum=attributes(summary(Bum(p.value), tau=Sig))$estimates
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
227 f=p.sum[4]/(p.sum[2]+p.sum[4])
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
228 f=as.numeric(f)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
229 return(f)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
230 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
231
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
232 # load miRNA database #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
233
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
234 load("/home/galaxy/galaxy-dist/tools/offtarget/miRNA.RData")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
235
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
236 ##################
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
237 ### data input ###
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
238 ##################
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
239
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
240 # data input #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
241
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
242 SeedStart=Seed
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
243 inputfile=read.csv(InputFile, header=F)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
244 id=inputfile[, 1]
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
245 Z=as.numeric(inputfile[, 2])
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
246
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
247 # Z score checking #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
248
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
249 if(sum(is.na(Z)) > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
250 print("Missing Value in Experimental Readouts!")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
251 quit()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
252 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
253
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
254 if(sum(Z == "") > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
255 print("Missing Value in Experimental Readouts!")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
256 quit()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
257 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
258
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
259 #####################
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
260 ### data analysis ###
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
261 #####################
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
262
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
263 ### Custom ###
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
264
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
265 if(Library == "custom"){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
266
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
267 ### check if user selected wrong "Library" parameter ###
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
268 if(ncol(inputfile) < 3){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
269 print("siRNA sequences not available and please check your Library selection")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
270 quit()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
271 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
272 ##################################################################
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
273
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
274 SeqDat=inputfile[, 3:ncol(inputfile)]
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
275 SeqDat=data.frame(SeqDat)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
276
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
277 if(sum(is.na(unlist(SeqDat))) > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
278 print("Missing Value in siRNA sequences!")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
279 quit()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
280 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
281
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
282 if(sum(unlist(SeqDat) == "") > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
283 print("Missing Value in siRNA sequences!")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
284 quit()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
285 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
286
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
287 Seq=SeqGenerate(SeqDat)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
288
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
289 SenseSeq=Seq$SenseSeq
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
290 Check=data.frame(apply(SenseSeq, 1, SeqCheck))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
291
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
292 if(sum(Check == FALSE) > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
293 print("Non-Sequence Value in siRNA sequences!")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
294 quit()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
295 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
296 else{
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
297 analysis=DataAnalysis(Z, Seq, SeedStart, Strand, Lambda)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
298 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
299 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
300
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
301 ### Ambion ###
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
302
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
303 if(Library == "ambion"){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
304
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
305 id=as.integer(id)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
306
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
307 if(sum(is.na(id)) > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
308 print("Missing Value in Gene ID!")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
309 quit()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
310 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
311
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
312 if(sum(id == "") > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
313 print("Missing Value in Gene ID!")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
314 quit()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
315 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
316
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
317 load("/home/galaxy/galaxy-dist/tools/offtarget/Ambion.RData")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
318 id.ambion=unique(ambion$id)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
319
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
320 if(length(id[!(id %in% id.ambion)]) > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
321 write.csv(c("No Matching siRNA Sequences with Below Gene ID and Removed from Analysis",
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
322 id[!(id %in% id.ambion)]), "unmatched.id.csv")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
323 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
324
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
325 Dat=data.frame(id=id, z=Z)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
326 Dat=merge(Dat, ambion, by="id", sort=F)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
327 Z=Dat$z
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
328 id=Dat$id
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
329 SeqDat=Dat[, 6:8]
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
330 Seq=SeqGenerate(SeqDat)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
331 analysis=DataAnalysis(Z, Seq, SeedStart, Strand, Lambda)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
332
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
333 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
334
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
335 ### New Dhar ###
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
336
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
337 if(Library == "new_dharmacon"){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
338
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
339 id=as.integer(id)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
340
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
341 if(sum(is.na(id)) > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
342 print("Missing Value in Gene ID!")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
343 quit()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
344 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
345
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
346 if(sum(id == "") > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
347 print("Missing Value in Gene ID!")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
348 quit()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
349 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
350
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
351 load("/home/galaxy/galaxy-dist/tools/offtarget/New_Dhar.RData")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
352 id.new.dhar=unique(new.dhar$id)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
353
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
354 if(length(id[!(id %in% id.new.dhar)]) > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
355 write.csv(c("No Matching siRNA Sequences with Below Gene ID and Removed from Analysis",
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
356 id[!(id %in% id.new.dhar)]), "unmatched.id.csv")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
357 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
358
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
359 Dat=data.frame(id=id, z=Z)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
360 Dat=merge(Dat, new.dhar, by="id", sort=F)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
361 Z=Dat$z
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
362 id=Dat$id
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
363 SeqDat=Dat[, 7:10]
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
364 Seq=SeqGenerate(SeqDat)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
365 analysis=DataAnalysis(Z, Seq, SeedStart, Strand, Lambda)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
366
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
367 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
368
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
369 ### Old Dhar ###
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
370
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
371 if(Library == "old_dharmacon"){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
372
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
373 id=as.integer(id)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
374
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
375 if(sum(is.na(id)) > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
376 print("Missing Value in Gene ID!")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
377 quit()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
378 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
379
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
380 if(sum(id == "") > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
381 print("Missing Value in Gene ID!")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
382 quit()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
383 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
384
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
385 load("/home/galaxy/galaxy-dist/tools/offtarget/Old_Dhar.RData")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
386 id.old.dhar=unique(old.dhar$id)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
387
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
388 if(length(id[!(id %in% id.old.dhar)]) > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
389 write.csv(c("No Matching siRNA Sequences with Below Gene ID and Removed from Analysis",
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
390 id[!(id %in% id.old.dhar)]), "unmatched.id.csv")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
391 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
392
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
393 Dat=data.frame(id=id, z=Z)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
394 Dat=merge(Dat, old.dhar, by="id", sort=F)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
395 Z=Dat$z
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
396 id=Dat$id
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
397 SeqDat=Dat[, 7:10]
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
398 Seq=SeqGenerate(SeqDat)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
399 analysis=DataAnalysis(Z, Seq, SeedStart, Strand, Lambda)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
400
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
401 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
402
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
403 ######################
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
404 ### results output ###
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
405 ######################
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
406
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
407 # code chagned below #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
408
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
409 # generating output files #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
410
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
411 duplex.dfr=analysis$seed.analysis$data
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
412 names(duplex.dfr)=paste("duplex", 1:length(names(duplex.dfr)), sep=".")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
413 seed.dfr=analysis$seed.analysis$seed.dfr
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
414 offtarget=analysis$offtarget.analysis$offtarget
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
415 Z.corrected=analysis$offtarget.analysis$Z.corrected
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
416 output.corr=cbind(ID=id, Z.Score=Z, Corrected.Z.Score=Z.corrected,
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
417 duplex.dfr, seed.dfr)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
418
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
419
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
420 print("outputing results... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
421
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
422 if(Library == 'custom'){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
423
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
424 print("outputing corrected Z score... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
425 write.csv(output.corr, "CorrectedZScore.csv", row.names=F) ### export corrected z score
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
426
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
427 print("outputing summary... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
428 write.csv(offtarget, "Seed_Famliy_Summary.csv", row.names=FALSE) ### output seed family summary table
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
429
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
430 print("outputing offtarget seed families... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
431
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
432 off.target=offtarget$seed[offtarget$ks.p.value < Sig & (
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
433 offtarget$coef < -Str | offtarget$coef > Str)] # select off target by custom setup
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
434
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
435 write.csv(offtarget[offtarget$seed %in% off.target, ],
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
436 "Off-Target_Seed_Families.csv", row.names=FALSE) ### export off-target seed families
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
437
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
438 print("outputing miRNAs... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
439 if(nrow(miRNA[miRNA$Seed.sequence %in% off.target, ]) == 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
440 write.csv("No miRNA detected", "miRNA.csv", row.names=F)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
441 fe="No"
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
442 } ### export miRNAs table sharing common seed
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
443
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
444 if(nrow(miRNA[miRNA$Seed.sequence %in% off.target, ]) != 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
445 write.csv(miRNA[miRNA$Seed.sequence %in% off.target, ], "miRNA.csv", row.names=F)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
446 fe="Yes"
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
447 } ### export miRNAs talbe sharing common seed
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
448
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
449 if(length(off.target) > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
450 for(i in 1:length(off.target)){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
451
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
452 n=off.target[i]
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
453 si=SeedExist(seed.dfr, n)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
454 si.offtarget=output.corr[si == 1, -3]
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
455 si.offtarget$off.target.seed=n
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
456 si.offtarget=si.offtarget[order(si.offtarget$Z.Score), ]
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
457 print(paste(n, "siRNA pools ... ..."))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
458 write.csv(si.offtarget, paste("siRNAsPool_OffTargetSeedFamily_", n, ".csv", sep=""), row.names=F)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
459
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
460 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
461 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
462 }else{
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
463
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
464 print("outputing corrected Z score... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
465 write.csv(output.corr[, 1:3], "CorrectedZScore.csv", row.names=F)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
466
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
467 print("outputing summary... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
468 write.csv(offtarget, "Seed_Famliy_Summary.csv", row.names=FALSE) ### output seed family summary table
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
469
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
470 print("outputing offtarget seed families... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
471
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
472 off.target=offtarget$seed[offtarget$ks.p.value < Sig & (
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
473 offtarget$coef < -Str | offtarget$coef > Str)] # select off target by custom setup
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
474
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
475 write.csv(offtarget[offtarget$seed %in% off.target, ],
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
476 "Off-Target_Seed_Families.csv", row.names=FALSE) ### export off-target seed families
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
477
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
478 print("outputing miRNAs... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
479 if(nrow(miRNA[miRNA$Seed.sequence %in% off.target, ]) == 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
480 write.csv("No miRNA detected", "miRNA.csv", row.names=F)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
481 fe="No"
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
482 } ### export miRNAs table sharing common seed
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
483
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
484 if(nrow(miRNA[miRNA$Seed.sequence %in% off.target, ]) != 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
485 write.csv(miRNA[miRNA$Seed.sequence %in% off.target, ], "miRNA.csv", row.names=F)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
486 fe="Yes"
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
487 } ### export miRNAs talbe sharing common seed
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
488
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
489 if(length(off.target) > 0){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
490 for(i in 1:length(off.target)){
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
491
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
492 n=off.target[i]
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
493 si=SeedExist(seed.dfr, n)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
494 si.offtarget=output.corr[si == 1, -3]
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
495 si.offtarget$off.target.seed=n
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
496 si.offtarget=si.offtarget[order(si.offtarget$Z.Score), ]
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
497 print(paste(n, "siRNA pools ... ..."))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
498 write.csv(si.offtarget, paste("siRNAsPool_OffTargetSeedFamily_", n, ".csv", sep=""), row.names=F)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
499
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
500 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
501 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
502 }
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
503
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
504
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
505 # generating output figures #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
506
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
507 print("outputing figures... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
508
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
509 jpeg(file="SeedFamilyOffTarget.jpeg", height=600, width=800)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
510 par(mar=c(5, 5, 2, 2))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
511 plot(offtarget$coef, -log10(offtarget$ks.p.value),
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
512 pch=20, col="blue", yaxt="n", cex.axis=1.5, cex.lab=2,
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
513 xlab="Strength of seed-linked effect", ylab="Significance (P value)")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
514 p.log=ceiling(-log10(min(offtarget$ks.p.value, na.rm=T))/2)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
515 p.range=10^(0:-p.log*2)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
516 axis(2, p.range,
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
517 at=-log(p.range, 10),
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
518 cex.axis=1.5)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
519 off.target=offtarget$seed[offtarget$coef < -Str & offtarget$ks.p.value < Sig]
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
520 nn=length(off.target)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
521 points(offtarget$coef[offtarget$seed %in% off.target],
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
522 -log10(offtarget$ks.p.value[offtarget$seed %in% off.target]),
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
523 col="red", pch=20)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
524 off.target=offtarget$seed[offtarget$coef > Str & offtarget$ks.p.value < Sig]
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
525 np=length(off.target)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
526 points(offtarget$coef[offtarget$seed %in% off.target],
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
527 -log10(offtarget$ks.p.value[offtarget$seed %in% off.target]),
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
528 col="green", pch=20)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
529 abline(v=-Str, col="red", lwd=3)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
530 abline(v=Str, col="green", lwd=3)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
531 abline(h=-log10(Sig), col="blue", lwd=3)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
532 dev.off()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
533
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
534 # generating legend #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
535
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
536 jpeg(file="SeedFamilyOffTargetLegend.jpeg", width=500, height=100)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
537 par(mar=c(0, 0, 0, 0))
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
538 plot(c(0, 2), c(0.8, 1.2), type="n", bty="n", yaxt="n", xaxt='n')
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
539 points(c(0, 0), c(1.1, 0.9), col=c('red', "green"), pch=20, cex=2)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
540 text(1, 1.1, paste('N: ', nn, " (Strength < ", -Str, ", P value < ", Sig, ")", sep=""), cex=2)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
541 text(1, 0.9, paste('N: ', np, " (Strength > ", Str, ", P value < ", Sig, ")", sep=""), cex=2)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
542 dev.off()
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
543
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
544 # new output #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
545
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
546 # generating summary #
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
547
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
548 print("Ansysis Summary ... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
549
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
550 print("p value ... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
551 p.value=offtarget$ks.p.value[offtarget$coef < -Str | offtarget$coef > Str]
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
552
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
553 print("FDR ... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
554 f=fdr.bum(p.value, Sig)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
555 print("rounding ... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
556 f=round(f, digits=3)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
557 print(f)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
558 print("FDR done ... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
559
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
560
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
561 print("off.sum ... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
562 off.sum=matrix(rep("", 30), 15, 2)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
563 off.sum[1,1]="Analysis is successfully done!"
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
564 off.sum[3,1]="Parameters set up"
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
565 off.sum[4:15, 1]=c("Strand Orientation for Analysis:", "Lambda value:", "Seed:",
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
566 "siRNA Library:", "Strength of seed-linked effect:", "P value:", "",
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
567 "Analysis summary",
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
568 "Number of negative off-target seed families:",
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
569 "Number of positive off-target seed families:", "FDR:",
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
570 "Identify miRNA with phenotypical effects:")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
571 off.sum[4:15, 2]=c(Strand, Lambda, Seed, Library, Str, Sig, "", "", nn, np, f, fe)
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
572 write.table(off.sum, "Analysis_Summary.csv", row.names=F, col.names=F, sep=",")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
573
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
574 print("Analysis done ... ...")
b73bf7e5bb41 Uploaded
mkim8
parents:
diff changeset
575