annotate region_motif_lib/regions.r @ 0:19d2cffb8db3 draft

Initial upload
author jeremyjliu
date Wed, 06 Aug 2014 15:36:46 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
1
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
2 # SHOULD ONLY OCCUR IN ONE FILE
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
3 #common.dir = "/Users/jeremyliu1/galaxy-dist/tools/my_tools"
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
4
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
5 # commonDir from region_motif_intersect.r sourcing file
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
6 dyn.load(paste(commonDir, "/region_motif_lib/regions.so",sep=""))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
7
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
8 ##reg = matrix(cbind(from,to)) from<to
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
9 ##region[[chr]] = reg
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
10 ##pos = unique(integer())
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
11 ##poslist = list(chr,pos, optional(strand=c(-1,0,+1)))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
12
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
13 # USED
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
14 merge.reg <- function(...,sep=1) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
15 ##This function returns union of regs.
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
16 reg = rbind(...)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
17 x=.C("merge_regions",as.integer(t(reg)),as.integer(nrow(reg)),as.integer(sep))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
18 reg=matrix(x[[1]][1:(x[[2]]*2)],ncol=2,byrow=TRUE)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
19 reg = matrix(reg[which(reg[,2]>reg[,1]),],ncol=2)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
20 reg[which(reg==0)]=1
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
21 return(reg)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
22 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
23
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
24 merge.regions<-function(...,sep=1) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
25 ##This function returns union of regions.
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
26 regions=list(...)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
27 chrs = unique(unlist(lapply(regions,names),use.names=F))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
28 region = list()
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
29 for(chr in chrs) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
30 region[[chr]] = do.call("merge.reg",c(lapply(regions,function(i) i[[chr]]),sep=sep))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
31 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
32 return(region)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
33 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
34
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
35 plot.reg<-function(reg,xlim=NULL,y=NULL,vertical=FALSE,...) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
36 ##This function does not stack if reg is overlapping.
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
37 ##new plot is made unless y is specified.
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
38 if(nrow(reg)==0) return()
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
39 if(is.null(xlim)) xlim=range(reg)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
40 if(is.null(y)) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
41 plot(xlim,c(0,1),type="n",axes=FALSE,xlab=" ",ylab=" ")
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
42 y=0.5
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
43 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
44 segments(reg[,1],y,reg[,2],...)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
45 if(vertical) abline(v=reg)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
46 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
47
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
48 distance.to.closest.reg.of.reg <- function(reg,reg2) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
49 ##for each element of reg, what is the closest distance to any element of reg2?
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
50 reg2 = merge.reg(reg2)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
51 reg2 = c(-Inf,t(reg2),Inf)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
52 s=reg[,1]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
53 e=reg[,2]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
54 sbin = as.integer(cut(s,reg2))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
55 ebin = as.integer(cut(e,reg2))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
56 d = pmin(s-reg2[sbin], reg2[sbin+1]-s, e-reg2[ebin], reg2[ebin+1]-e)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
57 d[which(sbin!=ebin | sbin%%2==0)] = 0
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
58 return(d)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
59 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
60
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
61 # USED
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
62 distance.to.closest.reg.of.pos <- function(pos,reg) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
63 ##for each element of pos, what is the closest distance to any element of reg?
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
64 reg = merge.reg(reg)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
65 reg = c(-Inf,t(reg),Inf)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
66 pbin = as.integer(cut(pos,reg))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
67 d = pmin(pos-reg[pbin], reg[pbin+1]-pos)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
68 d[which(pbin%%2==0)] = 0
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
69 return(d)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
70 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
71
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
72 distance.to.closest.pos.of.reg <- function(reg,pos,pos.strand=NULL,index.return=FALSE) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
73 ##for each element of reg, what is the closest distance to any element of pos?
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
74 ##if strand is provided, distance is along strand
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
75 o = order(pos)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
76 pos = c(-Inf,pos[o],Inf)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
77 o = c(o[1],o,o[length(o)])
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
78
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
79 s=reg[,1]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
80 e=reg[,2]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
81 sbin = as.integer(cut(s,pos))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
82 ebin = as.integer(cut(e,pos))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
83
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
84 d=integer(nrow(reg))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
85 s.is.closer = s-pos[sbin] < pos[sbin+1]-e
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
86 if(index.return) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
87 return(ifelse(s.is.closer,o[sbin],o[sbin+1]))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
88 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
89 d = ifelse(s.is.closer, s-pos[sbin], e-pos[sbin+1])
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
90 d[which(sbin!=ebin)] = 0
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
91 if(!is.null(pos.strand)) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
92 reg.strand = ifelse(s.is.closer,pos.strand[o][sbin],pos.strand[o][sbin+1])
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
93 d = d * reg.strand
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
94 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
95 return(d)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
96 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
97
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
98 if(F) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
99 pos = sample(seq(0,1000,200))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
100 pos2 = sample(seq(10,1010,100))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
101 pos.strand = sample(c(1,-1),6,replace = T)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
102 pos2.strand = sample(c(1,-1),11,replace = T)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
103 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
104
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
105 distance.to.closest.pos.of.pos <- function(pos,pos2,pos.strand=NULL,pos2.strand=NULL, ignore.pos.strand=TRUE,index.return=FALSE) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
106 ##for each element of pos, what is the closest distance to any element of pos2?
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
107 ##if index.return==TRUE, index of pos2 closest to pos is returned
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
108 ##else if strand2 is provided, distance is along strand2
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
109 ##if strand and strand2 are both provided and !ignore.pos.strand
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
110 ## then output is a list giving plus.up, plus.down, minus.up, minus.down
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
111 ## plus.up: distance to closest upstream on the same same strand etc. etc.
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
112 o = order(pos2)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
113 pos2 = c(-Inf,pos2[o],Inf)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
114 if(!is.null(pos2.strand)) pos2.strand = c(-Inf,pos2.strand[o],Inf)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
115
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
116 if(is.null(pos2.strand) | is.null(pos.strand) | ignore.pos.strand) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
117 pbin = as.integer(cut(pos,pos2))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
118
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
119 pbin = ifelse(pos-pos2[pbin] < pos2[pbin+1]-pos,pbin,pbin+1)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
120 d = pos-pos2[pbin]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
121 if(!is.null(pos2.strand)) d = d * pos2.strand[pbin]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
122
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
123 if(index.return) return(o[pbin-1])
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
124 return(d)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
125 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
126 strands = list(plus=1,minus=-1)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
127 relcoords = list(up=0,down=1)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
128 ind = lapply(strands,function(strand) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
129 ind.p = c(1,which(pos2.strand==strand),length(pos2))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
130 pbin.p = cut(pos,pos2[ind.p],labels=FALSE)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
131 as.data.frame(lapply(relcoords,function(i) ind.p[pbin.p+i]))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
132 })
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
133 ind.temp = ind
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
134 ind.minus = which(pos.strand==-1)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
135 if(length(ind.minus)>0) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
136 ind[[1]][ind.minus,]=ind.temp[[2]][ind.minus,2:1]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
137 ind[[2]][ind.minus,]=ind.temp[[1]][ind.minus,2:1]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
138 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
139 ind = unlist(ind,recursive=FALSE)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
140 if(index.return) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
141 return( lapply(ind,function(i) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
142 i[which(i==1)]=NA
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
143 i[which(i==length(pos2))]=NA
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
144 o[i-1]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
145 }) )
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
146 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
147 return(lapply(ind,function(i) pos.strand*(pos2[i]-pos)))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
148 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
149
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
150 distance.to.closest.region.of.region <- function(region,region2) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
151 ##for each element of region[[chr]], what is the closest distance to any element of region2[[chr]]?
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
152 ##returns d[[chr]]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
153 chrs = names(region)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
154 d=list()
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
155 for(chr in chrs) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
156 if(is.null(region2[[chr]])) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
157 d[[chr]] = rep(Inf,nrow(region[[chr]]))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
158 } else {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
159 d[[chr]] = distance.to.closest.reg.of.reg(region[[chr]],region2[[chr]])
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
160 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
161 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
162 return(d)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
163 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
164
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
165 # USED
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
166 distance.to.closest.region.of.poslist <- function(poslist,region) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
167 ##for each element of poslist, what is the closest distance to any element of region?
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
168 chrs = names(table(poslist$chr))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
169 d=integer()
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
170 for(chr in chrs) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
171 ind = which(poslist$chr==chr)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
172 pos=poslist$pos[ind]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
173 if(is.null(region[[chr]])) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
174 d[ind] = Inf
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
175 } else {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
176 d[ind] = distance.to.closest.reg.of.pos(pos,region[[chr]])
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
177 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
178 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
179 return(d)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
180 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
181 distance.to.closest.poslist.of.region <- function(region,poslist,index.return=FALSE) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
182 ##for each element of region, what is the closest distance to any element of poslist?
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
183 chrs = names(region)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
184 d=list()
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
185 for(chr in chrs) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
186 ind = which(poslist$chr==chr)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
187 pos=poslist$pos[ind]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
188 pos.strand=poslist$strand[ind]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
189 d[[chr]] = distance.to.closest.pos.of.reg(region[[chr]],pos,pos.strand,index.return=index.return)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
190 if(index.return) d[[chr]] = ind[d[[chr]]]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
191 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
192 return(d)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
193 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
194
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
195 distance.to.closest.poslist.of.poslist <- function(poslist,poslist2,ignore.poslist.strand=TRUE,index.return=FALSE) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
196 ##for each element of poslist, what is the closest distance to any element of poslist2?
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
197 ##if poslist2$strand is provided, distance is along strand2
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
198 ##if strand and strand2 are provided and no ignore.poslist.strand
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
199 ## then output is a list giving plus.up, plus.down, minus.up, minus.down
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
200 ## plus.up: distance to closest upstream on the same same strand etc. etc.
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
201 ##if index.return==TRUE, index of pos2 closest to pos is returned
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
202
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
203 chrs = names(table(poslist$chr))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
204
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
205 d=integer()
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
206 stranded = !(is.null(poslist2$strand) | is.null(poslist$strand) | ignore.poslist.strand)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
207 if(stranded) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
208 brs = c("plus.up","plus.down","minus.up","minus.down")
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
209 d=list()
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
210 for(br in brs) d[[br]]=integer()
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
211 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
212
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
213 for(chr in chrs) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
214 ind = which(poslist$chr==chr)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
215 ind2 = which(poslist2$chr==chr)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
216 pos=poslist$pos[ind]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
217 pos2=poslist2$pos[ind2]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
218 pos.strand=poslist$strand[ind]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
219 pos2.strand=poslist2$strand[ind2]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
220 if(!stranded) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
221 d[ind] = distance.to.closest.pos.of.pos(pos,pos2,pos.strand,pos2.strand,ignore.poslist.strand,index.return=index.return)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
222 if(index.return) d[ind] = ind2[d[ind]]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
223 } else {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
224 x = distance.to.closest.pos.of.pos(pos,pos2,pos.strand,pos2.strand,ignore.poslist.strand)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
225 for(br in brs) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
226 d[[br]][ind] = x[[br]]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
227 if(index.return) d[[br]][ind] = ind2[d[[br]][ind]]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
228 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
229 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
230 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
231 return(d)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
232 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
233
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
234
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
235 reg.minus.reg <- function(reg,reg2) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
236 x = .C("region_minus_region",as.integer(t(reg)),as.integer(nrow(reg)),as.integer(t(reg2)),as.integer(nrow(reg2)),integer((nrow(reg)+nrow(reg2))*2))[[5]]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
237 x=x[which(x>=0)]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
238 return(matrix(x,ncol=2,byrow=TRUE))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
239 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
240
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
241 intersection.of.regs <- function(reg,reg2) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
242 x = .C("intersection_of_regions",as.integer(t(reg)),as.integer(nrow(reg)),as.integer(t(reg2)),as.integer(nrow(reg2)),integer((nrow(reg)+nrow(reg2))*2))[[5]]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
243 x=x[which(x>=0)]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
244 return(matrix(x,ncol=2,byrow=TRUE))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
245 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
246
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
247 region.minus.region<-function(region,region2) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
248 chrs = names(region)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
249 for(chr in chrs) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
250 if(is.null(region[[chr]])) next
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
251 if(!is.null(region2[[chr]])) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
252 region[[chr]] = reg.minus.reg(region[[chr]],region2[[chr]])
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
253 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
254 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
255 return(region)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
256 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
257
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
258 intersection.of.regions<-function(region,region2) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
259 chrs = names(region)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
260 for(chr in chrs) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
261 if(is.null(region2[[chr]])) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
262 region[[chr]]<-NULL
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
263 } else {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
264 region[[chr]] = intersection.of.regs(region[[chr]],region2[[chr]])
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
265 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
266 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
267 return(region)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
268 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
269
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
270 reg.around.pos <-function(pos,range=500,strand=NULL) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
271 if(length(range)==1) range=c(range,range)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
272 if(is.null(strand)) strand = 1;
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
273 reg = cbind(pos-range[1]*strand,pos+range[2]*strand);
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
274 ind = which(reg[,2]<reg[,1])
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
275 reg[ind,] = reg[ind,2:1]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
276 ind = which(reg<=0)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
277 reg[ind] = 1
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
278 return(reg)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
279 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
280
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
281
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
282 region.around.poslist <-function(poslist,range=500) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
283 chrs = names(table(poslist$chr))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
284 region=list()
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
285 for(chr in chrs) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
286 ind = which(poslist$chr==chr)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
287 pos=poslist$pos[ind]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
288 strand = 1
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
289 if(!is.null(poslist$strand)) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
290 strand = poslist$strand[ind]
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
291 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
292 region[[chr]] = reg.around.pos(pos,range,strand)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
293 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
294 return(region)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
295 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
296
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
297
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
298 poslist.of.region.centers <-function(region) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
299 chrs = names(region)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
300 n=sapply(region,nrow)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
301 return(data.frame(chr=rep(chrs,n),pos=unlist(lapply(region,function(chr)(chr[,1]+chr[,2])/2),use.names = FALSE)))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
302 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
303
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
304 write.gff.region<-function(region,outfname) {
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
305 region = lapply(region,function(chr) list(s=chr[,1],e=chr[,2]))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
306 out=unlist.chr(region)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
307 out$chr=rep(names(region),sapply(region,function(i) length(i$s)))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
308 empty=rep(".",length(out$chr))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
309 write.table(data.frame(out$chr,empty,empty,out$s,out$e,empty,empty,empty,empty),quote=FALSE,sep="\t",file=outfname,col.names=FALSE,row.names=FALSE)
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
310 }
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
311
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
312 number.of.regions = function(region)sum(sapply(region,nrow))
19d2cffb8db3 Initial upload
jeremyjliu
parents:
diff changeset
313 size.of.regions = function(region) sum(sapply(merge.regions(region),function(reg) sum(reg[,2]-reg[,1])))