changeset 1:baf68a0e7cf4 draft

Deleted selected files
author jeremyjliu
date Wed, 06 Aug 2014 15:37:03 -0400
parents 19d2cffb8db3
children 5d9014614d28
files region_motif_lib/regions.cpp region_motif_lib/regions.o region_motif_lib/regions.r region_motif_lib/regions.so
diffstat 4 files changed, 0 insertions(+), 436 deletions(-) [+]
line wrap: on
line diff
--- a/region_motif_lib/regions.cpp	Wed Aug 06 15:36:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,123 +0,0 @@
-#include <iostream>
-#include <vector>
-#include <algorithm>
-using namespace std;
-
-extern "C"  {
-   typedef pair <int,int> Se_t;
-   bool se_lt (const Se_t &l,const Se_t &r) { return l.first < r.first; }
-   
-   void merge_regions(int *regions, int*nregionsR,int *merge_sepR) {
-      int nregs=nregionsR[0];
-      if(nregs==0) return;
-      int sep=merge_sepR[0];
-      if(sep<1) sep=1;
-      vector<Se_t> reg(nregs);
-      for(int ireg=0;ireg<nregs;ireg++) {
-	 reg[ireg]=make_pair(regions[ireg*2],regions[ireg*2+1]);
-      }
-      sort(reg.begin(),reg.end(),se_lt);
-      int *reg_index = new int[nregs];
-      for(int ireg=1;ireg<nregs;ireg++) reg_index[ireg]=-1;
-      reg_index[0]=0;
-      int last_ireg=0;
-      int counter=1;
-      for(int ireg=1;ireg<nregs;ireg++) {
-	 if(reg[ireg].first<=reg[last_ireg].second+sep) {
-	    if(reg[ireg].second>reg[last_ireg].second) reg[last_ireg].second=reg[ireg].second;
-	 } else {
-	    last_ireg=ireg;
-	    reg_index[counter]=ireg;
-	    counter++;
-	 }
-      }
-      for(int ireg=0;ireg<counter;ireg++) {
-	 regions[ireg*2] = reg[reg_index[ireg]].first;
-	 regions[ireg*2+1] = reg[reg_index[ireg]].second;
-      }
-      nregionsR[0]=counter;
-      delete [] reg_index;
-   }
-   void region_minus_region(int *regions, int*nregionsR,int *region2s, int*nregion2sR,int *updatedregions) {
-      int sep=1;
-      merge_regions(regions,nregionsR,&sep);
-      merge_regions(region2s,nregion2sR,&sep);
-      int nregs=nregionsR[0];
-      int nreg2s=nregion2sR[0];
-      for(int i=0;i<2*(nregs+nreg2s);i++) updatedregions[i]=-1;
-      if(nregs==0) return;
-      int ireg = 0;
-      int iregout = 0;
-      for(int ireg2=0; ireg2<nreg2s;ireg2++) {
-	 if(ireg==nregs) break;
-	 if(region2s[ireg2*2+1] < regions[2*ireg]) continue;
-	 if(region2s[ireg2*2] > regions[2*ireg+1]) {
-	    updatedregions[2*iregout] = regions[2*ireg];
-	    updatedregions[2*iregout+1] = regions[2*ireg+1];
-	    ireg++;
-	    ireg2--;
-	    iregout++;
-	    continue;
-	 }
-	 int s = regions[ireg*2];
-	 int e = regions[ireg*2+1];
-	 int s2 = region2s[ireg2*2];
-	 int e2 = region2s[ireg2*2+1];
-	 if(s2<=s && e2>=e) {
-	    ireg++;
-	    ireg2--;
-	 }
-	 else if(s2<=s) {
-	    regions[ireg*2] = e2+1;
-	    continue;
-	 } else if(e2>=e) {
-	    updatedregions[2*iregout] = s;
-	    updatedregions[2*iregout+1] = s2-1;
-	    ireg2--;
-	    iregout++;
-	    ireg++;
-	 } else {
-	    updatedregions[2*iregout] = s;
-	    updatedregions[2*iregout+1] = s2-1;
-	    regions[ireg*2] = e2+1;
-	    iregout++;
-	    ireg2--;
-	 }
-      }
-      while(ireg<nregs) {
-	 updatedregions[2*iregout] = regions[2*ireg];
-	 updatedregions[2*iregout+1] = regions[2*ireg+1];
-	 ireg++;
-	 iregout++;
-      }
-   }	    
-   void intersection_of_regions(int *regions, int*nregionsR,int *region2s, int*nregion2sR,int *updatedregions) {
-      int sep=1;
-      merge_regions(regions,nregionsR,&sep);
-      merge_regions(region2s,nregion2sR,&sep);
-      int nregs=nregionsR[0];
-      int nreg2s=nregion2sR[0];
-      for(int i=0;i<2*(nregs+nreg2s);i++) updatedregions[i]=-1;
-      if(nregs==0) return;
-      if(nreg2s==0) return;
-      int ireg2 = 0;
-      int iregout = 0;
-      for(int ireg=0; ireg<nregs;ireg++) {
-	 if(ireg2==nreg2s) return;
-	 if(regions[ireg*2+1] < region2s[2*ireg2]) continue;
-	 if(regions[ireg*2] > region2s[2*ireg2+1]) {ireg2++; ireg--; continue;}
-	 
-	 int s = regions[ireg*2];
-	 if(s<region2s[ireg2*2]) s = region2s[ireg2*2];
-	 int e = regions[ireg*2+1];
-	 if(e>region2s[ireg2*2+1]) {
-	    e = region2s[ireg2*2+1];
-	    ireg2++;
-	    ireg--;
-	 }
-	 updatedregions[2*iregout] = s;
-	 updatedregions[2*iregout+1] = e;
-	 iregout++;
-      }
-   }	    
-}
Binary file region_motif_lib/regions.o has changed
--- a/region_motif_lib/regions.r	Wed Aug 06 15:36:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,313 +0,0 @@
-
-# SHOULD ONLY OCCUR IN ONE FILE
-#common.dir = "/Users/jeremyliu1/galaxy-dist/tools/my_tools"
-
-# commonDir from region_motif_intersect.r sourcing file
-dyn.load(paste(commonDir, "/region_motif_lib/regions.so",sep=""))
-
-##reg = matrix(cbind(from,to)) from<to
-##region[[chr]] = reg
-##pos = unique(integer())
-##poslist = list(chr,pos, optional(strand=c(-1,0,+1)))
-
-# USED
-merge.reg <- function(...,sep=1) {
-  ##This function returns union of regs.
-  reg = rbind(...)
-  x=.C("merge_regions",as.integer(t(reg)),as.integer(nrow(reg)),as.integer(sep))
-  reg=matrix(x[[1]][1:(x[[2]]*2)],ncol=2,byrow=TRUE)
-  reg = matrix(reg[which(reg[,2]>reg[,1]),],ncol=2)
-  reg[which(reg==0)]=1
-  return(reg)
-}
-
-merge.regions<-function(...,sep=1) {
-  ##This function returns union of regions.
-    regions=list(...)
-    chrs = unique(unlist(lapply(regions,names),use.names=F))
-    region = list()
-    for(chr in chrs) {
-        region[[chr]] = do.call("merge.reg",c(lapply(regions,function(i) i[[chr]]),sep=sep))
-  }
-  return(region)
-}
-
-plot.reg<-function(reg,xlim=NULL,y=NULL,vertical=FALSE,...) {
-  ##This function does not stack if reg is overlapping.
-  ##new plot is made unless y is specified.
-  if(nrow(reg)==0) return()
-  if(is.null(xlim)) xlim=range(reg)
-  if(is.null(y)) {
-    plot(xlim,c(0,1),type="n",axes=FALSE,xlab=" ",ylab=" ")
-    y=0.5
-  }
-  segments(reg[,1],y,reg[,2],...)
-  if(vertical) abline(v=reg)
-}
-
-distance.to.closest.reg.of.reg <- function(reg,reg2) {
-  ##for each element of reg, what is the closest distance to any element of reg2?
-  reg2 = merge.reg(reg2)
-  reg2 = c(-Inf,t(reg2),Inf)
-  s=reg[,1]
-  e=reg[,2]
-  sbin = as.integer(cut(s,reg2))
-  ebin = as.integer(cut(e,reg2))
-  d = pmin(s-reg2[sbin], reg2[sbin+1]-s, e-reg2[ebin], reg2[ebin+1]-e)
-  d[which(sbin!=ebin | sbin%%2==0)] = 0
-  return(d)
-}
-
-# USED
-distance.to.closest.reg.of.pos <- function(pos,reg) {
-  ##for each element of pos, what is the closest distance to any element of reg?
-  reg = merge.reg(reg)
-  reg = c(-Inf,t(reg),Inf)
-  pbin = as.integer(cut(pos,reg))
-  d = pmin(pos-reg[pbin], reg[pbin+1]-pos)
-  d[which(pbin%%2==0)] = 0
-  return(d)
-}
-
-distance.to.closest.pos.of.reg <- function(reg,pos,pos.strand=NULL,index.return=FALSE) {
-  ##for each element of reg, what is the closest distance to any element of pos?
-  ##if strand is provided, distance is along strand
-  o = order(pos)
-  pos =  c(-Inf,pos[o],Inf)
-  o = c(o[1],o,o[length(o)])
-    
-  s=reg[,1]
-  e=reg[,2]
-  sbin = as.integer(cut(s,pos))
-  ebin = as.integer(cut(e,pos))
-  
-  d=integer(nrow(reg))
-  s.is.closer = s-pos[sbin] < pos[sbin+1]-e
-  if(index.return) {
-    return(ifelse(s.is.closer,o[sbin],o[sbin+1]))
-  }
-  d = ifelse(s.is.closer, s-pos[sbin], e-pos[sbin+1])
-  d[which(sbin!=ebin)] = 0
-  if(!is.null(pos.strand)) {
-    reg.strand = ifelse(s.is.closer,pos.strand[o][sbin],pos.strand[o][sbin+1])
-    d = d * reg.strand
-  }
-  return(d)
-}
-
-if(F) {
-    pos = sample(seq(0,1000,200))
-    pos2 = sample(seq(10,1010,100))
-    pos.strand = sample(c(1,-1),6,replace = T)
-    pos2.strand = sample(c(1,-1),11,replace = T)
-}
-
-distance.to.closest.pos.of.pos <- function(pos,pos2,pos.strand=NULL,pos2.strand=NULL, ignore.pos.strand=TRUE,index.return=FALSE) {
-  ##for each element of pos, what is the closest distance to any element of pos2?
-  ##if index.return==TRUE, index of pos2 closest to pos is returned
-  ##else if strand2 is provided, distance is along strand2
-  ##if strand and strand2 are both provided and !ignore.pos.strand
-  ##  then output is a list giving plus.up, plus.down, minus.up, minus.down
-  ##    plus.up: distance to closest upstream on the same same strand etc. etc. 
-  o = order(pos2)
-  pos2 =  c(-Inf,pos2[o],Inf)
-  if(!is.null(pos2.strand))   pos2.strand = c(-Inf,pos2.strand[o],Inf)
-
-  if(is.null(pos2.strand) | is.null(pos.strand) | ignore.pos.strand) {
-    pbin = as.integer(cut(pos,pos2))
-    
-    pbin = ifelse(pos-pos2[pbin] < pos2[pbin+1]-pos,pbin,pbin+1)
-    d = pos-pos2[pbin]
-    if(!is.null(pos2.strand)) d = d * pos2.strand[pbin]
-    
-    if(index.return) return(o[pbin-1])
-    return(d)
-  }
-  strands = list(plus=1,minus=-1)
-  relcoords = list(up=0,down=1)
-  ind = lapply(strands,function(strand) {
-    ind.p = c(1,which(pos2.strand==strand),length(pos2))
-    pbin.p = cut(pos,pos2[ind.p],labels=FALSE)
-    as.data.frame(lapply(relcoords,function(i) ind.p[pbin.p+i]))
-  })
-  ind.temp = ind
-  ind.minus = which(pos.strand==-1)
-  if(length(ind.minus)>0) {
-      ind[[1]][ind.minus,]=ind.temp[[2]][ind.minus,2:1]
-      ind[[2]][ind.minus,]=ind.temp[[1]][ind.minus,2:1]
-  }
-  ind = unlist(ind,recursive=FALSE)
-  if(index.return) {
-    return( lapply(ind,function(i) {
-      i[which(i==1)]=NA
-      i[which(i==length(pos2))]=NA
-      o[i-1]
-    }) )
-  }
-  return(lapply(ind,function(i) pos.strand*(pos2[i]-pos)))
-}
-
-distance.to.closest.region.of.region <- function(region,region2) {
-  ##for each element of region[[chr]], what is the closest distance to any element of region2[[chr]]?
-  ##returns d[[chr]]
-  chrs = names(region)
-  d=list()
-  for(chr in chrs) {
-    if(is.null(region2[[chr]])) {
-      d[[chr]] = rep(Inf,nrow(region[[chr]]))
-    } else {
-      d[[chr]] = distance.to.closest.reg.of.reg(region[[chr]],region2[[chr]])
-    }
-  }
-  return(d)
-}
-
-# USED
-distance.to.closest.region.of.poslist <- function(poslist,region) {
-  ##for each element of poslist, what is the closest distance to any element of region?
-  chrs = names(table(poslist$chr))
-  d=integer()
-  for(chr in chrs) {
-    ind = which(poslist$chr==chr)
-    pos=poslist$pos[ind]
-    if(is.null(region[[chr]])) {
-      d[ind] = Inf
-    } else {
-      d[ind] = distance.to.closest.reg.of.pos(pos,region[[chr]])
-    }
-  }
-  return(d)
-}
-distance.to.closest.poslist.of.region <- function(region,poslist,index.return=FALSE) {
-  ##for each element of region, what is the closest distance to any element of poslist?
-  chrs = names(region)
-  d=list()
-  for(chr in chrs) {
-    ind = which(poslist$chr==chr)
-    pos=poslist$pos[ind]
-    pos.strand=poslist$strand[ind]
-    d[[chr]] = distance.to.closest.pos.of.reg(region[[chr]],pos,pos.strand,index.return=index.return)
-    if(index.return) d[[chr]] = ind[d[[chr]]]
-  }
-  return(d)
-}
-
-distance.to.closest.poslist.of.poslist <- function(poslist,poslist2,ignore.poslist.strand=TRUE,index.return=FALSE) {
-  ##for each element of poslist, what is the closest distance to any element of poslist2?
-  ##if poslist2$strand is provided, distance is along strand2
-  ##if strand and strand2 are provided and no ignore.poslist.strand
-  ##  then output is a list giving plus.up, plus.down, minus.up, minus.down
-  ##    plus.up: distance to closest upstream on the same same strand etc. etc. 
-  ##if index.return==TRUE, index of pos2 closest to pos is returned
-  
-  chrs = names(table(poslist$chr))
-  
-  d=integer()
-  stranded = !(is.null(poslist2$strand) | is.null(poslist$strand) | ignore.poslist.strand)
-  if(stranded) {
-    brs = c("plus.up","plus.down","minus.up","minus.down")
-    d=list()
-    for(br in brs) d[[br]]=integer()
-  }
-  
-  for(chr in chrs) {
-    ind = which(poslist$chr==chr)
-    ind2 = which(poslist2$chr==chr)
-    pos=poslist$pos[ind]
-    pos2=poslist2$pos[ind2]
-    pos.strand=poslist$strand[ind]
-    pos2.strand=poslist2$strand[ind2]
-    if(!stranded) {
-      d[ind] = distance.to.closest.pos.of.pos(pos,pos2,pos.strand,pos2.strand,ignore.poslist.strand,index.return=index.return)
-      if(index.return) d[ind] = ind2[d[ind]]
-    } else {
-      x =  distance.to.closest.pos.of.pos(pos,pos2,pos.strand,pos2.strand,ignore.poslist.strand)
-      for(br in brs) {
-        d[[br]][ind] = x[[br]]
-        if(index.return) d[[br]][ind] = ind2[d[[br]][ind]]
-      }
-    }
-  }
-  return(d)
-}
-
-
-reg.minus.reg <- function(reg,reg2) {
-  x = .C("region_minus_region",as.integer(t(reg)),as.integer(nrow(reg)),as.integer(t(reg2)),as.integer(nrow(reg2)),integer((nrow(reg)+nrow(reg2))*2))[[5]]
-  x=x[which(x>=0)]
-  return(matrix(x,ncol=2,byrow=TRUE))
-}
-
-intersection.of.regs <- function(reg,reg2) {
-  x = .C("intersection_of_regions",as.integer(t(reg)),as.integer(nrow(reg)),as.integer(t(reg2)),as.integer(nrow(reg2)),integer((nrow(reg)+nrow(reg2))*2))[[5]]
-  x=x[which(x>=0)]
-  return(matrix(x,ncol=2,byrow=TRUE))
-}
-
-region.minus.region<-function(region,region2) {
-  chrs = names(region)
-  for(chr in chrs) {
-    if(is.null(region[[chr]])) next
-    if(!is.null(region2[[chr]])) {
-      region[[chr]] = reg.minus.reg(region[[chr]],region2[[chr]])
-    }
-  }
-  return(region)
-}
-
-intersection.of.regions<-function(region,region2) {
-  chrs = names(region)
-  for(chr in chrs) {
-    if(is.null(region2[[chr]])) {
-      region[[chr]]<-NULL
-    } else {
-      region[[chr]] = intersection.of.regs(region[[chr]],region2[[chr]])
-    }
-  }
-  return(region)
-}
-
-reg.around.pos <-function(pos,range=500,strand=NULL) {
-  if(length(range)==1) range=c(range,range)
-  if(is.null(strand)) strand = 1;
-  reg = cbind(pos-range[1]*strand,pos+range[2]*strand);
-  ind = which(reg[,2]<reg[,1])
-  reg[ind,] =  reg[ind,2:1]
-  ind = which(reg<=0)
-  reg[ind] = 1
-  return(reg)
-}
-
-
-region.around.poslist <-function(poslist,range=500) {
-  chrs = names(table(poslist$chr))
-  region=list()
-  for(chr in chrs) {
-    ind = which(poslist$chr==chr)
-    pos=poslist$pos[ind]
-    strand = 1
-    if(!is.null(poslist$strand)) {
-      strand = poslist$strand[ind]
-    }
-    region[[chr]] =  reg.around.pos(pos,range,strand)
-  }
-  return(region)
-}
-
-
-poslist.of.region.centers <-function(region) {
-  chrs = names(region)
-  n=sapply(region,nrow)
-  return(data.frame(chr=rep(chrs,n),pos=unlist(lapply(region,function(chr)(chr[,1]+chr[,2])/2),use.names = FALSE)))
-}
-
-write.gff.region<-function(region,outfname) {
-  region = lapply(region,function(chr) list(s=chr[,1],e=chr[,2]))
-  out=unlist.chr(region)
-  out$chr=rep(names(region),sapply(region,function(i) length(i$s)))
-  empty=rep(".",length(out$chr))
-  write.table(data.frame(out$chr,empty,empty,out$s,out$e,empty,empty,empty,empty),quote=FALSE,sep="\t",file=outfname,col.names=FALSE,row.names=FALSE)
-}
-
-number.of.regions = function(region)sum(sapply(region,nrow))
-size.of.regions = function(region) sum(sapply(merge.regions(region),function(reg) sum(reg[,2]-reg[,1])))
Binary file region_motif_lib/regions.so has changed