changeset 8:eeea5224f074 draft

Uploaded
author zzhou
date Tue, 27 Nov 2012 16:13:29 -0500
parents c34cac47f875
children 8c78c8985afd
files spp/DESCRIPTION spp/NAMESPACE spp/R/zroutines.R spp/configure spp/configure.ac spp/man/add.broad.peak.regions.Rd spp/man/find.binding.positions.Rd spp/man/get.binding.characteristics.Rd spp/man/get.broad.enrichment.clusters.Rd spp/man/get.conservative.fold.enrichment.profile.Rd spp/man/get.mser.Rd spp/man/get.mser.interpolation.Rd spp/man/get.smoothed.enrichment.mle.Rd spp/man/get.smoothed.tag.density.Rd spp/man/output.binding.results.Rd spp/man/read.bam.tags.Rd spp/man/read.bin.maqmap.tags.Rd spp/man/read.bowtie.tags.Rd spp/man/read.eland.tags.Rd spp/man/read.maqmap.tags.Rd spp/man/read.meland.tags.Rd spp/man/remove.local.tag.anomalies.Rd spp/man/select.informative.tags.Rd spp/man/spp-package.Rd spp/man/write.broadpeak.info.Rd spp/man/write.narrowpeak.binding.Rd spp/man/writewig.Rd spp/src/BGZF.cpp spp/src/BGZF.h spp/src/BamAlignment.cpp spp/src/BamAlignment.h spp/src/BamAux.h spp/src/BamIndex.cpp spp/src/BamIndex.h spp/src/BamMultiReader.cpp spp/src/BamMultiReader.h spp/src/BamReader.cpp spp/src/BamReader.h spp/src/BamReader_p.cpp spp/src/BamReader_p.h spp/src/BamStandardIndex_p.cpp spp/src/BamStandardIndex_p.h spp/src/BamToolsIndex_p.cpp spp/src/BamToolsIndex_p.h spp/src/BamWriter.cpp spp/src/BamWriter.h spp/src/BamWriter_p.cpp spp/src/BamWriter_p.h spp/src/Makevars.in spp/src/api_global.h spp/src/bamread.cpp spp/src/bamtools_global.h spp/src/bed2vector.cpp spp/src/cdensum.c spp/src/const.h spp/src/maqmap.c spp/src/maqmap.h spp/src/maqread.cpp spp/src/pc.h spp/src/peaks.cpp spp/src/wdl.cpp
diffstat 61 files changed, 18612 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/DESCRIPTION	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,12 @@
+Package: spp
+Type: Package
+Title: some description
+Version: 1.0
+Date: 2008-11-10
+Author: Peter K
+Depends: caTools
+Maintainer: peterK<peterk@compbio.med.harvard.edu>
+Description: Describe the package
+License: GPL-2
+LazyLoad: yes
+Packaged: Wed Nov 12 10:42:54 2008; vidhuch
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/NAMESPACE	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,3 @@
+useDynLib(spp)
+
+exportPattern("^[^\\.]")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/R/zroutines.R	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,2501 @@
+#library(caTools)
+#dyn.load("src/bed2vector.so");
+#dyn.load("src/wdl.so");
+#dyn.load("src/peaks.so");
+#dyn.load("src/cdensum.so");
+
+
+# -------- ROUTINES FOR READING IN THE DATA FILES ------------
+# fix.chromosome.names : remove ".fa" suffix from match sequence names
+read.eland.tags <- function(filename,read.tag.names=F,fix.chromosome.names=T,max.eland.tag.length=-1,extended=F,multi=F) {
+  if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); };
+  storage.mode(max.eland.tag.length) <- "integer";
+  callfunction <- "read_eland";
+  if(extended) { callfunction <- "read_eland_extended"; };
+  if(multi) { callfunction <- "read_eland_multi"; };
+  tl <- lapply(.Call(callfunction,filename,rtn,max.eland.tag.length),function(d) {
+    xo <- order(abs(d$t));
+    d$t <- d$t[xo];
+    d$n <- d$n[xo];
+    if(read.tag.names) {
+      d$s <- d$s[xo];
+    }
+    return(d);
+  });
+  if(fix.chromosome.names) {
+    # remove ".fa"
+    names(tl) <- gsub("\\.fa","",names(tl))
+  }
+  # separate tags and quality
+  if(read.tag.names) {
+    return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),names=lapply(tl,function(d) d$s)));
+  } else {
+    return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n)));
+  }
+}
+
+read.tagalign.tags <- function(filename,fix.chromosome.names=T,fix.quality=T) {
+  tl <- lapply(.Call("read_tagalign",filename),function(d) {
+    xo <- order(abs(d$t));
+    d$t <- d$t[xo];
+    d$n <- d$n[xo];
+    #if(fix.quality) {
+    #  d$n <- 4-cut(d$n,breaks=c(0,250,500,750,1000),labels=F)
+    #}
+    if(fix.quality) { # Anshul: changed the way the quality field is processed
+      if (min(d$n)<0.5){
+        d$n = ceiling(1000/4^d$n);
+      }
+      break.vals <- unique(sort(c(0,unique(d$n))));
+      d$n <- length(break.vals)-1-cut(d$n,breaks=break.vals,labels=F);
+    }    
+    return(d);
+  });
+  if(fix.chromosome.names) {
+    # remove ".fa"
+    names(tl) <- gsub("\\.fa","",names(tl))
+  }
+  # separate tags and quality
+  return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n)));
+}
+
+
+read.short.arachne.tags <- function(filename,fix.chromosome.names=F) {
+  tl <- lapply(.Call("read_arachne",filename),function(d) {
+    xo <- order(abs(d$t));
+    d$t <- d$t[xo];
+    d$n <- d$n[xo];
+    return(d);
+  });
+  if(fix.chromosome.names) {
+    # remove ".fa"
+    names(tl) <- gsub("\\.fa","",names(tl))
+  }
+  # separate tags and quality
+  return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n)));
+}
+
+
+read.arachne.tags <- function(filename,fix.chromosome.names=F) {
+  tl <- lapply(.Call("read_arachne_long",filename),function(d) {
+    xo <- order(abs(d$t));
+    d$t <- d$t[xo];
+    d$n <- d$n[xo];
+    d$l <- d$l[xo];
+    return(d);
+  });
+  if(fix.chromosome.names) {
+    # remove ".fa"
+    names(tl) <- gsub("\\.fa","",names(tl))
+  }
+  # separate tags and quality
+  return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),length=lapply(tl,function(d) d$l)));
+}
+
+read.bowtie.tags <- function(filename,read.tag.names=F,fix.chromosome.names=F) {
+  if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); };
+  tl <- lapply(.Call("read_bowtie",filename,rtn),function(d) {
+    xo <- order(abs(d$t));
+    d$t <- d$t[xo];
+    d$n <- d$n[xo];
+    if(read.tag.names) {
+      d$s <- d$s[xo];
+    }
+    return(d);
+  });
+  if(fix.chromosome.names) {
+    # remove ".fa"
+    names(tl) <- gsub("\\.fa","",names(tl))
+  }
+  # separate tags and quality
+  if(read.tag.names) {
+    return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),names=lapply(tl,function(d) d$s)));
+  } else {
+    return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n)));
+  }
+}
+
+read.bam.tags <- function(filename,read.tag.names=F,fix.chromosome.names=F) {
+  if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); };
+  tl <- lapply(.Call("read_bam",filename,rtn),function(d) {
+    xo <- order(abs(d$t));
+    d$t <- d$t[xo];
+    d$n <- d$n[xo];
+    if(read.tag.names) {
+      d$s <- d$s[xo];
+    }
+    return(d);
+  });
+  if(fix.chromosome.names) {
+    # remove ".fa"
+    names(tl) <- gsub("\\.fa","",names(tl))
+  }
+  # separate tags and quality
+  if(read.tag.names) {
+    return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),names=lapply(tl,function(d) d$s)));
+  } else {
+    return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n)));
+  }
+}
+
+
+read.helicos.tags <- function(filename,read.tag.names=F,fix.chromosome.names=F,include.length.info=T) {
+  if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); };
+  tl <- lapply(.Call("read_helicostabf",filename,rtn),function(d) {
+    xo <- order(abs(d$t));
+    d$t <- d$t[xo];
+    d$n <- d$n[xo];
+    d$l <- d$l[xo];
+    if(read.tag.names) {
+      d$s <- d$s[xo];
+    }
+    return(d);
+  });
+  if(fix.chromosome.names) {
+    # remove ".fa"
+    names(tl) <- gsub("\\.fa","",names(tl))
+  }
+  # separate tags and quality
+  if(read.tag.names) {
+    return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),length=lapply(tl,function(d) d$l),names=lapply(tl,function(d) d$s)));
+  } else {
+    return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),length=lapply(tl,function(d) d$l)));
+  }
+}
+
+read.maqmap.tags <- function(filename,read.tag.names=F,fix.chromosome.names=T) {
+  if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); };
+  tl <- lapply(.Call("read_maqmap",filename,rtn),function(d) {
+    xo <- order(abs(d$t));
+    d$t <- d$t[xo];
+    d$n <- d$n[xo];
+    if(read.tag.names) {
+      d$s <- d$s[xo];
+    }
+    return(d);
+  });
+  if(fix.chromosome.names) {
+    # remove ".fa"
+    names(tl) <- gsub("\\.fa","",names(tl))
+  }
+  # separate tags and quality
+  if(read.tag.names) {
+    return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),names=lapply(tl,function(d) d$s)));
+  } else {
+    return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n)));
+  }
+}
+
+
+read.bin.maqmap.tags <- function(filename,read.tag.names=F,fix.chromosome.names=T) {
+  if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); };
+  tl <- lapply(.Call("read_binmaqmap",filename,rtn),function(d) {
+    xo <- order(abs(d$t));
+    d$t <- d$t[xo];
+    d$n <- d$n[xo];
+    if(read.tag.names) {
+      d$s <- d$s[xo];
+    }
+    return(d);
+  });
+  if(fix.chromosome.names) {
+    # remove ".fa"
+    names(tl) <- gsub("\\.fa","",names(tl))
+  }
+  # separate tags and quality
+  if(read.tag.names) {
+    return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),names=lapply(tl,function(d) d$s)));
+  } else {
+    return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n)));
+  }
+}
+
+
+# read in tags from an extended eland format with match length information
+read.meland.tags <- function(filename,read.tag.names=F,fix.chromosome.names=T) {
+  if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); };
+  tl <- lapply(.Call("read_meland",filename,rtn),function(d) {
+    xo <- order(abs(d$t));
+    d$t <- d$t[xo];
+    d$n <- d$n[xo];
+    d$l <- d$l[xo];
+    if(read.tag.names) {
+      d$s <- d$s[xo];
+    }
+    return(d);
+  });
+
+  if(fix.chromosome.names) {
+    # remove ".fa"
+    names(tl) <- gsub("\\.fa","",names(tl))
+  }
+  # separate tags and quality
+  chrl <- names(tl); names(chrl) <- chrl;
+  # reformulate quality scores into monotonic integers
+  ml <- max(unlist(lapply(tl,function(d) max(d$l))));
+  qual <- lapply(chrl,function(chr) (ml-tl[[chr]]$l)+tl[[chr]]$n/10);
+  if(read.tag.names) {
+    return(list(tags=lapply(tl,function(d) d$t),quality=qual,names=lapply(tl,function(d) d$s)));
+  } else {
+    return(list(tags=lapply(tl,function(d) d$t),quality=qual));
+  }
+}
+
+# -------- ROUTINES FOR ASSESSING BINDING PATTERN AND SELECTING INFORMATIVE TAGS  ------------
+
+# removes tag positions that have anomalously high counts on both strands
+# z - z-score used to determine anomalous bins
+# zo - z used to filter out one-strand matches
+# trim.fraction - fraction of top bins to discard when calculating overall background density
+remove.tag.anomalies <- function(data, bin=1,trim.fraction=1e-3,z=5,zo=3*z) {
+  
+  t.remove.tag.anomalies <- function(tv,bin=1,trim.fraction=1e-3,z=5,zo=3*z,return.indecies=F) {
+    tt <- table(floor(tv/bin));
+
+    # trim value
+    stt <- sort(as.numeric(tt));
+    stt <- stt[1:(length(stt)*(1-trim.fraction))];
+    mtc <- mean(stt); tcd <- sqrt(var(stt));
+
+    thr <- max(1,ceiling(mtc+z*tcd));
+    thr.o <- max(1,ceiling(mtc+zo*tcd));
+    # filter tt
+    tt <- tt[tt>=thr]
+    # get + and - tags
+    tp <- as.numeric(names(tt));
+    pti <- tp>0;
+    it <- intersect(tp[pti],(-1)*tp[!pti]);
+    # add one-strand matches
+    it <- unique(c(it,tp[tt>=thr.o]));
+    sit <- c(it,(-1)*it);
+    
+    if(bin>1) {
+      sit <- sit*bin;
+      sit <- c(sit,unlist(lapply(1:bin,function(i) sit+i)))
+    }
+    if(return.indecies) {
+      return(!tv %in% sit);
+    } else {
+      return(tv[!tv %in% sit]);
+    }
+  }
+
+  vil <- lapply(data$tags,t.remove.tag.anomalies,return.indecies=T,bin=bin,trim.fraction=trim.fraction,z=z,zo=zo);
+  chrl <- names(data$tags); names(chrl) <- chrl;
+  data$tags <- lapply(chrl,function(chr) data$tags[[chr]][vil[[chr]]]);
+  # count tags to remove empty chromosomes
+  nt <- unlist(lapply(data$tags,length));
+  if(any(nt==0)) {
+    data$tags <- data$tags[nt!=0]
+  }
+  
+  if(!is.null(data$quality)) {
+    data$quality <- lapply(chrl,function(chr) data$quality[[chr]][vil[[chr]]]);
+    data$quality <- data$quality[nt!=0];
+  }
+  if(!is.null(data$names)) {
+    data$names <- lapply(chrl,function(chr) data$names[[chr]][vil[[chr]]]);
+    data$names <- data$names[nt!=0];
+  }
+  
+  return(data);
+}
+
+# caps or removes tag positions that are significantly higher than local background
+remove.local.tag.anomalies <- function(tags,window.size=200,eliminate.fold=10,cap.fold=4,z.threshold=3) {
+  lapply(tags,filter.singular.positions.by.local.density,window.size=2e2,eliminate.fold=10,cap.fold=4,z.threshold=3);
+}
+
+
+
+# assess strand cross-correlation, determine peak position, determine appropriate window size
+# for binding detection.
+get.binding.characteristics <- function(data,srange=c(50,500),bin=5,cluster=NULL,debug=F,min.tag.count=1e3,acceptance.z.score=3,remove.tag.anomalies=T,anomalies.z=5,accept.all.tags=F) {
+  if(remove.tag.anomalies) {
+    data <- remove.tag.anomalies(data,z=anomalies.z);
+  }
+  
+  # take highest quality tag bin
+  if(!is.null(data$quality) & !accept.all.tags) {
+    min.bin <- min(unlist(lapply(data$quality,min)))
+    chrl <- names(data$tags); names(chrl) <- chrl;
+    otl <- lapply(chrl,function(chr) data$tags[[chr]][data$quality[[chr]]==min.bin]);
+  } else {
+    otl <- data$tags;
+  }
+  # remove empty chromosomes
+  otl <- otl[unlist(lapply(otl,length))!=0];
+
+
+  # calculate strand scc
+  if(!is.null(cluster)) {
+    cc <- clusterApplyLB(cluster,otl,tag.scc,srange=srange,bin=bin);
+    names(cc) <- names(otl); 
+  } else {
+    cc <- lapply(otl,tag.scc,srange=srange,bin=bin);
+  }
+  ccl<-list(sample=cc);
+  ccl.av <- lapply(names(ccl),t.plotavcc,type='l',ccl=ccl,return.ac=T,ttl=list(sample=otl),plot=F)[[1]]
+  ccl.av <- data.frame(x=as.numeric(names(ccl.av)),y=as.numeric(ccl.av));
+  
+  # find peak
+  pi <- which.max(ccl.av$y);
+  
+  # determine width at third-height
+  th <- (ccl.av$y[pi]-ccl.av$y[length(ccl.av$y)])/3+ccl.av$y[length(ccl.av$y)]
+  whs <- max(ccl.av$x[ccl.av$y>=th]);
+  
+  if (! is.integer(whs)) { # Anshul: added this to avoid situations where whs ends up being -Inf
+  	whs <- ccl.av$x[ min(c(2*pi,length(ccl.av$y))) ]
+  }
+
+  # determine acceptance of different quality bins
+  
+  # calculates tag scc for the best tags, and combinations of best tag category with every other category
+  # for subsequent selection of acceptable categories
+  scc.acceptance.calc <- function() {
+
+    qr <- range(unlist(lapply(data$quality,range)))
+
+    # start with best tags
+
+    # determine half-width for scc calculations
+    pi <- which.max(ccl.av$y);
+
+    # determine width at half-height
+    th <- (ccl.av$y[pi]-ccl.av$y[length(ccl.av$y)])/2+ccl.av$y[length(ccl.av$y)]
+    lwhs <- max(ccl.av$x[ccl.av$y>=th])-ccl.av$x[pi];
+    lwhs <- max(c(20,bin*10,lwhs));
+    srange <- ccl.av$x[pi]+c(-lwhs,lwhs)
+
+    # calculate chromosome-average scc
+    t.scc <- function(tags) {
+      if(is.null(cluster)) {
+        cc <- lapply(tags,tag.scc,srange=srange,bin=bin);
+      } else {
+        cc <- clusterApplyLB(cluster,tags,tag.scc,srange=srange,bin=bin); names(cc) <- names(tags);
+      }
+      return(t.plotavcc(1,type='l',ccl=list(cc),ttl=list(tags),plot=F,return.ac=T))
+    }
+
+
+    # returns info list for a given tag length (lv), mismatch count (nv)
+    t.cat <- function(qual) {
+      # construct tag set
+      if(qual==qr[1]) {
+        ts <- otl;
+      } else {
+        nts <- names(otl); names(nts) <- nts;
+        # select tags
+        at <- lapply(nts,function(chr) data$tags[[chr]][data$quality[[chr]]==qual]);
+        ntags <- sum(unlist(lapply(at,length)));
+        if(ntags<min.tag.count) { return(NULL); }
+
+        # append to otl
+        ts <- lapply(nts,function(nam) c(otl[[nam]],at[[nam]]));
+      }
+
+      return(t.scc(ts));
+    }
+
+
+    # calculate cross-correlation values for each quality bin
+    ql <- sort(unique(unlist(lapply(data$quality,unique)))); names(ql) <- ql;
+
+    qccl <- lapply(ql,t.cat);
+
+    # acceptance tests
+    ac <- c(T,unlist(lapply(qccl[-1],function(d) if(is.null(d)) { return(F) } else { t.test(d-qccl[[as.character(min.bin)]],alternative="greater")$p.value<pnorm(acceptance.z.score,lower.tail=F) }))); names(ac) <- names(qccl);
+    return(list(informative.bins=ac,quality.cc=qccl))
+  }
+
+  if(accept.all.tags | is.null(data$quality)) {
+    return(list(cross.correlation=ccl.av,peak=list(x=ccl.av$x[pi],y=ccl.av$y[pi]),whs=whs))    
+  } else {
+    acc <- scc.acceptance.calc();
+    return(list(cross.correlation=ccl.av,peak=list(x=ccl.av$x[pi],y=ccl.av$y[pi]),whs=whs,quality.bin.acceptance=acc));
+  }
+
+}
+
+
+# select a set of informative tags based on the pre-calculated binding characteristics
+select.informative.tags <- function(data,binding.characteristics=NULL) {
+  if(is.null(binding.characteristics)) {
+    return(data$tags);
+  }
+  if(is.null(binding.characteristics$quality.bin.acceptance)) {
+    cat("binding characteristics doesn't contain quality selection info, accepting all tags\n");
+    return(data$tags);
+  }
+
+  ib <- binding.characteristics$quality.bin.acceptance$informative.bins;
+  abn <- names(ib)[ib]
+
+  chrl <- names(data$tags); names(chrl) <- chrl;
+  lapply(chrl,function(chr) {
+    data$tags[[chr]][as.character(data$quality[[chr]]) %in% abn]
+  })
+}
+
+# -------- ROUTINES FOR CALLING BINDING POSITIONS  ------------
+
+# determine binding positions
+# signal.data - IP tag lists
+# control.data - input tag lists
+# e.value - desired E-value threshold (either E-value or FDR threshold must be provided)
+# fdr - desired FDR threshold
+# min.dist - minimal distance between detected positions
+# tag.count.whs - size of the window to be used to estimate confidence interval of the peak fold enrichment ratios
+# enrichmnent.z - Z-score defining the desired confidence level for enrichment interval estimates
+# enrichment.background.scales - define how many tiems larger should be the window for estimating background
+#                                tag density when evaluating peak enrichment confidence intervals.
+#                                If multiple values are given, multiple independent interval estimates will be
+#                                calculated.
+# tec.filter - whether to mask out the regions that exhibit significant background enrichment
+# tec.window.size, tec.z - window size and Z-score for maksing out significant background enrichment regions
+#
+# If the control.data is not provided, the method will assess significance of the determined binding positions
+# based on the randomizations of the original data. The following paramters control such randomizations:
+# n.randomizations - number of randomizations to be performed
+# shuffle.window - size of the bin that defines the tags that are kept together during randomization.
+#                  value of 0 means that all tags are shuffled independently
+#
+# Binding detection methods: 
+# tag.wtd - default method.
+#           must specify parameter "whs", which is the half-size of the window used to calculate binding scores
+# tag.lwcc - LWCC method;
+#           must specify whs - a size of the window used to calculate binding scores
+#           can specify isize (default=15bp) - size of the internal window that is masked out
+find.binding.positions <- function(signal.data,f=1,e.value=NULL,fdr=NULL, masked.data=NULL,control.data=NULL,whs=200,min.dist=200,window.size=4e7,cluster=NULL,debug=T,n.randomizations=3,shuffle.window=1,min.thr=2,topN=NULL, tag.count.whs=100, enrichment.z=2, method=tag.wtd, tec.filter=T,tec.window.size=1e4,tec.z=5,tec.masking.window.size=tec.window.size, tec.poisson.z=5,tec.poisson.ratio=5, tec=NULL, n.control.samples=1, enrichment.scale.down.control=F, enrichment.background.scales=c(1,5,10), use.randomized.controls=F, background.density.scaling=T, mle.filter=F, min.mle.threshold=1, ...) {
+
+  if(f<1) {
+    if(debug) { cat("subsampling signal ... "); }
+    signal.data <- lapply(signal.data,function(x) sample(x,length(x)*f))
+    if(debug) {  cat("done\n"); }
+  }
+
+
+  if(!is.null(control.data) & !use.randomized.controls) {
+    # limit both control and signal data to a common set of chromosomes
+    chrl <- intersect(names(signal.data),names(control.data));
+    signal.data <- signal.data[chrl];
+    control.data <- control.data[chrl];
+    control <- list(control.data);
+  } else {
+    control <- NULL;
+  }
+  
+  prd <- lwcc.prediction(signal.data,min.dist=min.dist,whs=whs,window.size=window.size,e.value=e.value,fdr=fdr,debug=debug,n.randomizations=n.randomizations,shuffle.window=shuffle.window,min.thr=min.thr,cluster=cluster,method=method,bg.tl=control.data,mask.tl=masked.data, topN=topN, control=control,tec.filter=tec.filter,tec.z=tec.z,tec.window.size=tec.window.size, tec.masking.window.size=tec.masking.window.size, tec.poisson.z=tec.poisson.z,tec.poisson.ratio=tec.poisson.ratio, background.density.scaling=background.density.scaling, ...);
+
+  # add tag counts
+  chrl <- names(prd$npl); names(chrl) <- chrl;
+  prd$npl <- lapply(chrl,function(chr) {
+    pd <- prd$npl[[chr]];
+    pd$nt <- points.within(abs(signal.data[[chr]]),pd$x-tag.count.whs,pd$x+tag.count.whs,return.point.counts=T);
+    return(pd);
+  });
+  prd$f <- f;
+  prd$n <- sum(unlist(lapply(signal.data,length)));
+  if(!is.null(control.data)) {
+    prd$n.bg <- sum(unlist(lapply(control.data,length)));
+  }
+  
+  # calculate enrichment ratios
+  prd <- calculate.enrichment.estimates(prd,signal.data,control.data=control.data,fraction=1,tag.count.whs=tag.count.whs,z=enrichment.z,scale.down.control=enrichment.scale.down.control,background.scales=enrichment.background.scales);
+
+  if(mle.filter) {
+    if(!is.null(prd$npl)) {
+      if(length(prd$npl)>1) {
+        mle.columns <- grep("enr.mle",colnames(prd$npl[[1]]));
+        if(length(mle.columns)>1) {
+          prd$npl <- lapply(prd$npl,function(d) d[apply(d[,mle.columns],1,function(x) all(x>min.mle.threshold)),])
+        }
+      }
+    }
+  }
+
+  prd$whs <- whs;
+
+  return(prd);
+}
+
+
+
+# -------- ROUTINES FOR WRITING OUT TAG DENSITY AND ENRICHMENT PROFILES  ------------
+# calculate smoothed tag density, optionally subtracting the background
+get.smoothed.tag.density <- function(signal.tags,control.tags=NULL,bandwidth=150,bg.weight=NULL,tag.shift=146/2,step=round(bandwidth/3),background.density.scaling=T,rngl=NULL,scale.by.dataset.size=F) {
+  chrl <- names(signal.tags); names(chrl) <- chrl;
+
+  if(!is.null(control.tags)) {
+    bg.weight <- dataset.density.ratio(signal.tags,control.tags,background.density.scaling=background.density.scaling);
+  }
+
+  if(scale.by.dataset.size) {
+    den.scaling <- dataset.density.size(signal.tags,background.density.scaling=background.density.scaling)/1e6;
+  } else {
+    den.scaling <- 1;
+  }
+  
+  lapply(chrl,function(chr) {
+    ad <- abs(signal.tags[[chr]]+tag.shift);
+    rng <- NULL;
+    if(!is.null(rngl)) {
+      rng <- rngl[[chr]];
+    }
+    if(is.null(rng)) {
+      rng <- range(ad);
+    }
+
+    ds <- densum(ad,bw=bandwidth,from=rng[1],to=rng[2],return.x=T,step=step);
+    if(!is.null(control.tags)) {
+      if(!is.null(control.tags[[chr]])) {
+        bsd <- densum(abs(control.tags[[chr]]+tag.shift),bw=bandwidth,from=rng[1],to=rng[2],return.x=F,step=step);
+        ds$y <- ds$y-bsd*bg.weight;
+      }
+    }
+    return(data.frame(x=seq(ds$x[1],ds$x[2],by=step),y=den.scaling*ds$y))
+  })
+}
+
+# get smoothed maximum likelihood estimate of the log2 signal to control enrichment ratio
+get.smoothed.enrichment.mle <- function(signal.tags, control.tags, tag.shift=146/2, background.density.scaling=F, pseudocount=1,bg.weight=NULL,  ... ) {
+  # determine common range
+  chrl <- intersect(names(signal.tags),names(control.tags)); names(chrl) <- chrl;
+  rngl <- lapply(chrl,function(chr) range(c(range(abs(signal.tags[[chr]]+tag.shift)),range(abs(control.tags[[chr]]+tag.shift)))))
+  ssd <- get.smoothed.tag.density(signal.tags, rngl=rngl, ..., scale.by.dataset.size=F)
+  csd <- get.smoothed.tag.density(control.tags, rngl=rngl, ..., scale.by.dataset.size=F)
+  if(is.null(bg.weight)) {
+    bg.weight <- dataset.density.ratio(signal.tags,control.tags,background.density.scaling=background.density.scaling);
+  }
+  cmle <- lapply(chrl,function(chr) { d <- ssd[[chr]]; d$y <- log2(d$y+pseudocount) - log2(csd[[chr]]$y+pseudocount) - log2(bg.weight); return(d); })
+}
+
+
+# returns a conservative upper/lower bound profile (log2) given signal tag list, background tag list and window scales
+get.conservative.fold.enrichment.profile <- function(ftl,btl,fws,bwsl=c(1,5,25,50)*fws,step=50,tag.shift=146/2,alpha=0.05,use.most.informative.scale=F,quick.calculation=T,background.density.scaling=T,bg.weight=NULL,posl=NULL,return.mle=F) {
+  # include only chromosomes with more than 2 reads
+  ftl <- ftl[unlist(lapply(ftl,length))>2]
+  chrl <- names(ftl); names(chrl) <- chrl;
+  if(!is.null(posl)) {
+    chrl <- chrl[chrl %in% names(posl)];
+  }
+  # calculate background tag ratio
+  if(is.null(bg.weight)) {
+    bg.weight <- dataset.density.ratio(ftl,btl,background.density.scaling=background.density.scaling);
+  }
+  lapply(chrl,function(chr) {
+    if(is.null(btl[[chr]])) { bt <- c(); } else { bt <- abs(btl[[chr]]+tag.shift); }
+    if(is.null(posl)) {
+      x <- mbs.enrichment.bounds(abs(ftl[[chr]]+tag.shift),bt,fws=fws,bwsl=bwsl,step=step,calculate.upper.bound=T,bg.weight=bg.weight,use.most.informative.scale=use.most.informative.scale,quick.calculation=quick.calculation,alpha=alpha);
+    } else {
+      x <- mbs.enrichment.bounds(abs(ftl[[chr]]+tag.shift),bt,fws=fws,bwsl=bwsl,step=step,calculate.upper.bound=T,bg.weight=bg.weight,use.most.informative.scale=use.most.informative.scale,quick.calculation=quick.calculation,alpha=alpha,pos=posl[[chr]]);
+    }
+    # compose profile showing lower bound for enriched, upper bound for depleted regions
+    ps <- rep(1,length(x$mle));
+    vi <- which(!is.na(x$lb) & x$lb>1);
+    ps[vi] <- x$lb[vi];
+    vi <- which(!is.na(x$ub) & x$ub<1);
+    ps[vi] <- x$ub[vi];
+    ps <- log2(ps);
+    if(is.null(posl)) {
+      if(return.mle) {
+        return(data.frame(x=seq(x$x$s,x$x$e,by=x$x$step),y=ps,mle=log2(x$mle),lb=log2(x$lb),ub=log2(x$ub)));
+      } else {
+        return(data.frame(x=seq(x$x$s,x$x$e,by=x$x$step),y=ps));
+      }
+    } else {
+      if(return.mle) {
+        return(data.frame(x=posl[[chr]],y=ps,mle=log2(x$mle),lb=log2(x$lb),ub=log2(x$ub)));
+      } else {
+        return(data.frame(x=posl[[chr]],y=ps));
+      }
+    }
+  })
+}
+
+
+# write a per-chromosome $x/$y data structure into a wig file
+writewig <- function(dat,fname,feature,threshold=5,zip=F) {
+  chrl <- names(dat); names(chrl) <- chrl;
+  invisible(lapply(chrl,function(chr) {
+    bdiff <- dat[[chr]];
+    ind <- seq(1,length(bdiff$x));
+    ind <- ind[!is.na(bdiff$y[ind])];
+    header <- chr==chrl[1];
+    write.probe.wig(chr,bdiff$x[ind],bdiff$y[ind],fname,append=!header,feature=feature,header=header);
+  }))
+  if(zip) {
+    zf <- paste(fname,"zip",sep=".");
+    system(paste("zip \"",zf,"\" \"",fname,"\"",sep=""));
+    system(paste("rm \"",fname,"\"",sep=""));
+    return(zf);
+  } else {
+    return(fname);
+  }
+}
+
+
+
+# -------- ROUTINES FOR ANALYZING SATURATION PROPERTIES  ------------
+
+# PUBLIC
+# calculate minimal saturation enrichment ratios (MSER) 
+get.mser <- function(signal.data,control.data,n.chains=5,step.size=1e5, chains=NULL, cluster=NULL, test.agreement=0.99, return.chains=F, enrichment.background.scales=c(1), n.steps=1, ...) {
+  if(is.null(chains)) {
+    ci <- c(1:n.chains); names(ci) <- ci;
+    if(is.null(cluster)) {
+      chains <- lapply(ci,get.subsample.chain.calls,signal.data=signal.data,control.data=control.data,n.steps=n.steps,step.size=step.size,subsample.control=F, enrichment.background.scales=enrichment.background.scales, ...);
+    } else {
+      chains <- clusterApplyLB(cluster,ci,get.subsample.chain.calls,signal.data=signal.data,control.data=control.data,n.steps=n.steps,step.size=step.size,subsample.control=F, enrichment.background.scales=enrichment.background.scales, ...);
+      names(chains) <- ci;
+    }
+  }
+  cvl <- mser.chain.interpolation(chains=chains,enrichment.background.scales=enrichment.background.scales,test.agreement=test.agreement,return.lists=F);
+  if(n.steps>1) {
+    msers <- cvl;
+  } else {
+    msers <- unlist(lapply(cvl,function(d) d$me))
+  }
+  if(return.chains) {
+    return(list(mser=msers,chains=chains));
+  } else {
+    return(msers);
+  }
+}
+
+# PUBLIC 
+# interpolate MSER dependency on tag counts
+get.mser.interpolation <- function(signal.data,control.data,target.fold.enrichment=5,n.chains=10,n.steps=6,step.size=1e5, chains=NULL,  test.agreement=0.99, return.chains=F, enrichment.background.scales=c(1), excluded.steps=c(seq(2,n.steps-2)), ...) {
+  msers <- get.mser(signal.data,control.data,n.chains=n.chains,n.steps=n.steps,step.size=step.size,chains=chains,test.agrement=test.agreement,return.chains=T,enrichment.background.scales=enrichment.background.scales,excluded.steps=excluded.steps, ...);
+
+  # adjust sizes in case a subset of chromosomes was used
+  mser <- mser.chain.interpolation(chains=msers$chains,enrichment.background.scales=enrichment.background.scales,test.agreement=test.agreement,return.lists=T);
+  sr <- sum(unlist(lapply(signal.data,length)))/mser[[1]][[1]]$n[1];
+
+  # Subsampling each chain requires removing a fraction of each chromosome's
+  # tag list.  To get the exact step.size, this often leaves chromosomes with
+  # a non-integer number of tags.  The non-integer values are floored, so each
+  # chr can contribute at most 0.999.. <= 1 error to the step.size.
+  floor.error <- length(msers$chains[[1]][[1]]$npl)
+  intpn <- lapply(mser,function(ms) {
+    lmvo <- do.call(rbind,ms)
+    lmvo$n <- lmvo$n*sr;
+    # Don't select rows corresponding to excluded.steps
+    # Keep in mind that nd values are negative.
+    lmvo <- lmvo[lmvo$nd <= (lmvo$nd[1] + floor.error) & lmvo$nd >= (lmvo$nd[1] - floor.error),];
+    lmvo <- na.omit(lmvo);
+    if(any(lmvo$me==1)) {
+      return(list(prediction=NA));
+    }
+    lmvo$n <- log10(lmvo$n); lmvo$me <- log10(lmvo$me-1)
+    # remove non-standard steps
+    emvf <- lm(me ~ n,data=lmvo);
+    tfe <- (log10(target.fold.enrichment-1)-coef(emvf)[[1]])/coef(emvf)[[2]];
+    tfen <- 10^tfe;
+    return(list(prediction=tfen,log10.fit=emvf));
+  })
+  
+  if(return.chains) {
+    return(list(interpolation=intpn,chains=msers$chains))
+  } else {
+    return(intpn);
+  }
+  
+  return(msers);
+ 
+}
+
+
+# output binding detection results to a text file
+# the file will contain a table with each row corresponding
+# to a detected position, with the following columns:
+# chr - chromosome or target sequence
+# pos - position of detected binding site on the chromosome/sequence
+# score - a score reflecting magnitude of the binding
+# Evalue - E-value corresponding to the peak magnitude
+# FDR - FDR corresponding to the peak magnitude
+# enrichment.lb - lower bound of the fold-enrichment ratio
+# enrichment.mle - maximum likelihood estimate of the fold-enrichment ratio
+output.binding.results <- function(results,filename) {
+  write(file=filename,"chr\tpos\tscore\tEvalue\tFDR\tenrichment.lb\tenrichment.mle",append=F);
+  chrl <- names(results$npl); names(chrl) <- chrl;
+  x <- lapply(chrl,function(chr) {
+    d <- results$npl[[chr]];
+    if(dim(d)[1]>0) {
+      if(results$thr$type=="topN") {
+        od <- cbind(rep(chr,dim(d)[1]),subset(d,select=c(x,y,enr,enr.mle)))
+      } else {
+        od <- cbind(rep(chr,dim(d)[1]),subset(d,select=c(x,y,evalue,fdr,enr,enr.mle)))
+      }
+      write.table(od,file=filename,col.names=F,row.names=F,sep="\t",append=T,quote=F)
+    }
+  })
+}
+
+
+# -------- LOW-LEVEL ROUTINES  ------------
+
+# calculates tag strand cross-correlation for a range of shifts (on positive strand)
+tag.scc <- function(tags,srange=c(50,250),bin=1,tt=NULL,llim=10) {
+  if(is.null(tt)) {
+    tt <- table(sign(tags)*as.integer(floor(abs(tags)/bin+0.5)));
+  }
+  if(!is.null(llim)) { l <- mean(tt); tt <- tt[tt<llim*l] }
+  tc <- as.integer(names(tt));
+  tt <- as.numeric(tt);
+
+  pv <- tt; pv[tc<0]<-0;
+  nv <- tt; nv[tc>0]<-0;
+
+  pti <- which(tc>0)
+  nti <- which(tc<0);
+
+  ptc <- tc[pti];
+  ntc <- (-1)*tc[nti];
+
+  ptv <- tt[pti];
+  ntv <- tt[nti];
+
+  trng <- range(c(range(ptc),range(ntc)))
+  l <- diff(trng)+1;
+  rm(tc,tt);
+
+  mp <- sum(ptv)*bin/l;   mn <- sum(ntv)*bin/l;
+  ptv <- ptv-mp; ntv <- ntv-mn;
+  ss <- sqrt((sum(ptv*ptv)+(l-length(ptv))*mp^2) * (sum(ntv*ntv)+(l-length(ntv))*mn^2));
+
+  t.cor <- function(s) {
+    smi <- match(ptc+s,ntc);
+    return((sum(ptv[!is.na(smi)]*ntv[na.omit(smi)]) -
+           mn*sum(ptv[is.na(smi)]) -
+           mp*sum(ntv[-na.omit(smi)]) +
+           mp*mn*(l-length(ptv)-length(ntv)+length(which(!is.na(smi)))))/ss);
+  }
+  shifts <- floor(seq(srange[1],srange[2],by=bin)/bin+0.5);
+  scc <- unlist(lapply(shifts,t.cor)); names(scc) <- shifts*bin;
+  return(scc);
+}
+
+
+# plot tag cross-correlation
+t.plotcc <- function(ac, lab=c(10,5,7), ylab="correlation", xlab="lag", pch=19, grid.i=c(-5:5), grid.s=10, type='b', plot.grid=F, cols=c(1,2,4,"orange",8,"pink"), min.peak.x=NULL, xlim=NULL, plot.147=F, plot.max=T, rmw=1, rescale=F, legendx="right", ltys=rep(1,length(ac)), ...) {
+    if(is.list(ac)) {
+      cols <- cols[1:length(ac)];
+
+      if(!is.null(xlim)) {
+        vx <- as.numeric(names(ac[[1]])); vx <- which(vx>=xlim[1] & vx<=xlim[2]);
+        ac[[1]] <- (ac[[1]])[vx];
+      } else {
+        xlim <- range(as.numeric(names(ac[[1]])));
+      }
+
+
+      plot(as.numeric(names(ac[[1]])),runmean(ac[[1]],rmw),type=type,pch=pch,xlab=xlab,ylab=ylab,lab=lab, col=cols[1], xlim=xlim, lty=ltys[1], ...);
+      if(length(ac)>1) {
+        for(i in seq(2,length(ac))) {
+          irng <- range(ac[[i]]);
+          vx <- as.numeric(names(ac[[i]])); vx <- which(vx>=xlim[1] & vx<=xlim[2]);
+          if(rescale) {
+            lines(as.numeric(names(ac[[i]])[vx]),runmean((ac[[i]][vx]-irng[1])/diff(irng)*diff(range(ac[[1]]))+min(ac[[1]]),rmw),col=cols[i],lty=ltys[i]);
+          } else {
+            lines(as.numeric(names(ac[[i]]))[vx],runmean(ac[[i]][vx],rmw),col=cols[i],lty=ltys[i]);
+          }
+        }
+      }
+      if(is.null(min.peak.x)) {
+        m <- as.numeric(names(ac[[1]])[which.max(ac[[1]])]);
+      } else {
+        sac <- (ac[[1]])[which(as.numeric(names(ac[[1]]))>min.peak.x)]
+        m <- as.numeric(names(sac)[which.max(sac)]);
+      }
+      legend(x="topright",bty="n",legend=c(names(ac)),col=cols,lty=ltys)
+    } else {
+      if(!is.null(xlim)) {
+        vx <- as.numeric(names(ac));
+        vx <- which(vx>=xlim[1] & vx<=xlim[2]);
+        ac <- ac[vx];
+      } else {
+        xlim <- range(as.numeric(names(ac)));
+      }
+      
+      plot(names(ac),runmean(ac,rmw),type=type,pch=pch,xlab=xlab,ylab=ylab,lab=lab, xlim=xlim, ...);
+      if(is.null(min.peak.x)) {
+        m <- as.numeric(names(ac)[which.max(ac)]);
+      } else {
+        sac <- ac[which(names(ac)>min.peak.x)]
+        m <- as.numeric(names(sac)[which.max(sac)]);
+      }
+    }
+    if(plot.147) {
+      abline(v=147,lty=2,col=8);
+    }
+    if(plot.grid) {
+      abline(v=m+grid.i*grid.s,lty=3,col="pink");
+    }
+    if(plot.max) {
+      abline(v=m,lty=2,col=2);
+      legend(x=legendx,bty="n",legend=c(paste("max at ",m,"bp",sep="")));
+      return(m);
+    }
+  }
+  
+  # plot chromosome-acerage cross-correlation 
+  t.plotavcc <- function(ci, main=paste(ci,"chromosome average"), ccl=tl.cc, return.ac=F, ttl=tl, plot=T, ... ) {
+    cc <- ccl[[ci]];
+    if(length(cc)==1)  { return(cc[[1]]) };
+    if(length(cc)==0) { return(c()) };
+    ac <- do.call(rbind,cc);
+    # omit NA chromosomes
+    ina <- apply(ac,1,function(d) any(is.na(d)));
+
+    tags <- ttl[[ci]]; 
+    avw <- unlist(lapply(tags,length));    avw <- avw/sum(avw);    
+    ac <- ac[!ina,]; avw <- avw[!ina];
+    ac <- apply(ac,2,function(x) sum(x*avw));
+    if(plot) {
+      m <- t.plotcc(ac, main=main, ...);
+      if(!return.ac) { return(m) }
+    }
+    if(return.ac) { return(ac) }
+  }
+
+  t.plotchrcc <- function(ci,ncol=4, ccl=tl.cc, ... ) {
+    cc <- ccl[[ci]];
+    ac <- do.call(rbind,cc);
+    par(mfrow = c(length(cc)/ncol,ncol), mar = c(3.5,3.5,2.0,0.5), mgp = c(2,0.65,0), cex = 0.8)
+    lapply(names(cc),function(ch) { t.plotcc(cc[[ch]],main=paste(ci,": chr",ch,sep=""), ...) })
+  }
+
+  t.plotavccl <- function(ci, ccl=tl.ccl, main=paste(ci,"chromosome average"), rtl=tl, ... ) {
+    #cc <- lapply(ccl[[ci]],function(x) { if(!is.null(x$M)) { x$M <- NULL;}; return(x); });
+    cc <- ccl[[ci]];
+    chrs <- names(cc[[1]]); names(chrs) <- chrs;
+    acl <- lapply(cc,function(x) do.call(rbind,x));
+    tags <- rtl[[ci]][chrs]; 
+    avw <- unlist(lapply(tags,length));    avw <- avw/sum(avw);
+    acl <- lapply(acl,function(ac) apply(ac,2,function(x) sum(x*avw)))
+    t.plotcc(acl, main=main, ...);
+  }
+  
+  t.plotchrccl <- function(ci,ccl=tl.ccl,ncol=4, ... ) {
+    par(mfrow = c(length(cc[[1]])/ncol,ncol), mar = c(3.5,3.5,2.0,0.5), mgp = c(2,0.65,0), cex = 0.8)
+    lapply(names(cc[[1]]),function(ch) { t.plotcc(lapply(cc,function(x) x[[ch]]),main=paste(ci,": chr",ch,sep=""), ...) })
+  }
+
+  
+
+show.scc <- function(tl,srange,cluster=NULL) {
+  if(!is.null(cluster)) {
+    cc <- clusterApplyLB(cluster,tl,tag.scc,srange=srange);
+    names(cc) <- names(tl); 
+  } else {
+    cc <- lapply(tl,tag.scc,srange=srange);
+  }
+  par(mfrow = c(1,1), mar = c(3.5,3.5,2.0,0.5), mgp = c(2,0.65,0), cex = 0.8);
+  ccl<-list(sample=cc);
+  ccl.av <- lapply(names(ccl),t.plotavcc,type='l',ccl=ccl,xlim=srange,return.ac=F,ttl=list(sample=tl),main="")[[1]]
+}
+
+# find regions of significant tag enrichment
+find.significantly.enriched.regions <- function(signal.data,control.data,window.size=500,multiplier=1,z.thr=3,mcs=0,debug=F,background.density.scaling=T,masking.window.size=window.size,poisson.z=0,poisson.ratio=4,either=F,tag.shift=146/2,bg.weight=NULL) {
+  if(is.null(bg.weight)) {
+    bg.weight <- dataset.density.ratio(signal.data,control.data,background.density.scaling=background.density.scaling);
+  }
+
+  if(debug) {
+    cat("bg.weight=",bg.weight,"\n");
+  }
+  chrl <- names(signal.data); names(chrl) <- chrl; 
+  tec <- lapply(chrl,function(chr) {
+    d <- tag.enrichment.clusters(signal.data[[chr]],control.data[[chr]],bg.weight=bg.weight*multiplier,thr=z.thr,wsize=window.size,mcs=mcs,min.tag.count.z=poisson.z,min.tag.count.ratio=poisson.ratio,either=either,tag.shift=tag.shift);
+    d$s <- d$s-masking.window.size/2; d$e <- d$e+masking.window.size/2;
+    return(d);
+  })
+}
+
+
+# given tag position vectors, find contigs of significant enrichment of signal over background
+# thr - z score threshold
+# mcs - minimal cluster size
+# bg.weight - fraction by which background counts should be multipled
+# min.tag.count.z will impose a poisson constraint based on randomized signal in parallel of background constaint (0 - no constraint)
+tag.enrichment.clusters <- function(signal,background,wsize=200,thr=3,mcs=1,bg.weight=1,min.tag.count.z=0,tag.av.den=NULL,min.tag.count.thr=0,min.tag.count.ratio=4,either=F,tag.shift=146/2) {
+  if(is.null(tag.av.den)) {
+    tag.av.den <- length(signal)/diff(range(abs(signal)));
+  }
+  if(min.tag.count.z>0) {
+    min.tag.count.thr <- qpois(pnorm(min.tag.count.z,lower.tail=F),min.tag.count.ratio*tag.av.den*wsize,lower.tail=F)
+  } else {
+    min.tag.count.thr <- 0;
+  }
+  
+  #if(bg.weight!=1) {
+  #  background <- sample(background,length(background)*(bg.weight),replace=T);
+  #}
+  # make up combined position, flag vectors
+  pv <- abs(c(signal,background)+tag.shift);
+  fv <- c(rep(1,length(signal)),rep(0,length(background)));
+  po <- order(pv);
+  pv <- pv[po];
+  fv <- fv[po];
+
+  #thr <- pnorm(thr,lower.tail=F);
+  
+  storage.mode(wsize) <- storage.mode(mcs) <- storage.mode(fv) <- "integer";
+  storage.mode(thr) <- storage.mode(pv) <- "double";
+  storage.mode(bg.weight) <- "double";
+  storage.mode(min.tag.count.thr) <- "double";
+  either <- as.integer(either);
+  storage.mode(either) <- "integer";
+  
+  z <- .Call("find_poisson_enrichment_clusters",pv,fv,wsize,thr,mcs,bg.weight,min.tag.count.thr,either)
+  return(z);
+}
+
+
+
+
+
+# estimates threshold, calculates predictions on complete data and randomized data
+# input: tvl
+# control - a list of control tag datasets
+# no randomization is done if control is supplied
+# return.rtp - return randomized tag peaks - do not fit thresholds or do actual predictions
+# topN - use min threshold to do a run, return topN peaks from entire genome
+# threshold - specify a user-defined threshold
+lwcc.prediction <- function(tvl,e.value=NULL, fdr=0.01, chrl=names(tvl), min.thr=0, n.randomizations=1, shuffle.window=1, debug=T, predict.on.random=F, shuffle.both.strands=T,strand.shuffle.only=F, return.rtp=F, control=NULL, print.level=0, threshold=NULL, topN=NULL, bg.tl=NULL, tec.filter=T, tec.window.size=1e3,tec.z=3, tec.masking.window.size=tec.window.size, tec.poisson.z=3,tec.poisson.ratio=4, bg.reverse=T, return.control.predictions=F, return.core.data=F, background.density.scaling=T, ... ) {
+
+  control.predictions <- NULL;
+  core.data <- list();
+
+  if(!is.null(bg.tl) & tec.filter) {
+    if(debug) { cat("finding background exclusion regions ... "); }
+    tec <- find.significantly.enriched.regions(bg.tl,tvl,window.size=tec.window.size,z.thr=tec.z,masking.window.size=tec.masking.window.size,poisson.z=tec.poisson.z,poisson.ratio=tec.poisson.ratio,background.density.scaling=background.density.scaling,either=T);
+    if(return.core.data) {
+      core.data <- c(core.data,list(tec=tec));
+    }
+    if(debug) { cat("done\n"); }
+  }
+
+  
+  if(is.null(threshold) & is.null(topN)) { # threshold determination is needed
+    # generate control predictions
+    if(!is.null(control)) {
+      if(debug) { cat("determining peaks on provided",length(control),"control datasets:\n");   }
+      if(!is.null(bg.tl)) {
+        if(bg.reverse) {
+          if(debug) { cat("using reversed signal for FDR calculations\n"); }
+          rbg.tl <- tvl;
+        } else {
+          if(debug) { cat("generating randomized (within chromosome) background ... "); }
+          rbg.tl <- lapply(bg.tl,function(d) {
+            if(length(d)<2) { return(d); }
+            rng <- range(abs(d));
+            rd <- round(runif(length(d),rng[1],rng[2]));
+            nrd <- sample(1:length(rd),length(which(d<0)));
+            rd[nrd] <- rd[nrd]*(-1);
+            return(rd);
+          })
+          if(debug) { cat("done\n"); }
+        }
+      } else {
+        rbg.tl <- NULL;
+      }
+      n.randomizations <- length(control);
+      #signal.size <- sum(unlist(lapply(tvl,length)));
+      rtp <- lapply(control,function(d) {
+        # calculate tag.weight
+        #tag.weight <- sum(unlist(lapply(tvl,length)))/sum(unlist(lapply(d,length)));
+        tag.weight <- dataset.density.ratio(tvl,d,background.density.scaling=background.density.scaling);
+        #cat("tag.weight=",tag.weight," ");
+        return(window.call.mirror.binding(d,min.thr=min.thr, tag.weight=tag.weight,bg.tl=rbg.tl, debug=debug, round.up=T,background.density.scaling=background.density.scaling, ...));
+        #return(window.call.mirror.binding(d,min.thr=min.thr, method=tag.wtd,wsize=200,bg.tl=control.data,window.size=window.size,debug=T,min.dist=min.dist,cluster=cluster))
+      });
+      if(return.core.data) {
+        core.data <- c(core.data,list(rtp.unfiltered=rtp));
+      }
+      if(tec.filter) {
+        if(debug) { cat("excluding systematic background anomalies ... "); }
+        rtp <- lapply(rtp,filter.binding.sites,tec,exclude=T);
+        if(debug) { cat("done\n"); }
+      }
+    } else {
+      if(debug) { cat("determining peaks on ",n.randomizations,"randomized datasets:\n");   }
+      rtp <- lapply(1:n.randomizations,function(i) {
+        rd <- generate.randomized.data(tvl,shuffle.window=shuffle.window,shuffle.both.strands=shuffle.both.strands,strand.shuffle.only=strand.shuffle.only);
+        return(window.call.mirror.binding(rd,min.thr=min.thr,bg.tl=bg.tl, debug=debug, ...));
+        #return(window.call.mirror.binding(rd,min.thr=min.thr, method=tag.wtd,wsize=200,bg.tl=control.data,window.size=window.size,debug=T,min.dist=min.dist))
+      });
+    }
+    if(return.control.predictions) {
+      control.predictions <- rtp;
+    } 
+    rtp <- do.call(rbind,lapply(rtp,function(d) do.call(rbind,d))); # merge tables
+    
+    # generate real data predictions
+    if(debug) { cat("determining peaks on real data:\n");   }
+    npl <- window.call.mirror.binding(tvl,min.thr=min.thr,bg.tl=bg.tl, debug=debug, background.density.scaling=background.density.scaling, ...);
+    #npl <- window.call.mirror.binding(tvl,min.thr=min.thr, method=tag.wtd,wsize=200,bg.tl=control.data,window.size=window.size,debug=T,min.dist=min.dist,cluster=cluster);
+    if(return.core.data) {
+      core.data <- c(core.data,list(npl.unfiltered=npl));
+    }
+
+    if(!is.null(bg.tl) & tec.filter) {
+      if(debug) { cat("excluding systematic background anomalies ... "); }
+      npl <- filter.binding.sites(npl,tec,exclude=T);
+      if(debug) { cat("done\n"); }
+    }
+
+    # calculate E-value and FDRs for all of the peaks
+    if(debug) { cat("calculating statistical thresholds\n"); }
+    chrl <- names(npl); names(chrl) <- chrl;
+    npld <- do.call(rbind,lapply(names(npl),function(chr) { k <- npl[[chr]]; if(!is.null(k) & dim(k)[1]>0) { k$chr <- rep(chr,dim(k)[1]) }; return(k) }))
+    npld <- cbind(npld,get.eval.fdr.vectors(npld$y,rtp$y));
+    # correct for n.randomizations
+    npld$fdr <- npld$fdr/n.randomizations;
+    npld$evalue <- npld$evalue/n.randomizations;
+
+    if(return.core.data) {
+      core.data <- c(core.data,list(npld=npld));
+    }
+
+    # determine actual thresholds
+    if(is.null(e.value)) {
+      if(is.null(fdr)) { fdr <- 0.01; }
+      thr <- list(root=min(npld$y[npld$fdr<=fdr]),type="FDR",fdr=fdr)
+      if(debug) { cat("FDR",fdr,"threshold=",thr$root,"\n");  }
+    } else {
+      # determine threshold based on e-value
+      thr <- list(root=min(npld$y[npld$evalue<=e.value]),type="Evalue",e.value=e.value)
+      if(debug) { cat("E-value",e.value,"threshold=",thr$root,"\n");  }
+    }
+
+
+    npld <- npld[npld$y>=thr$root,];
+    if(dim(npld)[1]>0) {
+      npl <- tapply(c(1:dim(npld)[1]),as.factor(npld$chr),function(ii) {df <- npld[ii,]; df$chr <- NULL; return(df) });
+    } else {
+      npl <- list();
+    }
+  } else {
+    if(is.null(threshold)) {
+      thr <- list(root=min.thr,type="minimal");
+    } else {
+      thr <- list(root=threshold,type="user specified");
+    }
+
+    cat("calling binding positions using",thr$type,"threshold (",thr$root,") :\n");
+    npl <- window.call.mirror.binding(tvl=tvl,min.thr=thr$root,bg.tl=bg.tl, debug=debug, ...);
+    if(!is.null(bg.tl) & tec.filter) {
+      if(debug) { cat("excluding systematic background anomalies ... "); }
+      npl <- filter.binding.sites(npl,tec,exclude=T);
+      if(debug) { cat("done\n"); }
+    }
+
+    if(!is.null(topN)) {
+      # determine threshold based on topN peaks
+      ay <- unlist(lapply(npl,function(d) d$y));
+      if(length(ay)>topN) {
+        thr <- list(root=sort(ay,decreasing=T)[topN],type="topN",topN=topN);
+        cat(paste("determined topN threshold :",thr$root,"\n"));      
+        npl <- lapply(npl,function(d) d[d$y>thr$root,]);
+      }
+    }
+  }
+
+  if(return.core.data) {
+    return(c(list(npl=npl,thr=thr),core.data));
+  }
+  if(return.control.predictions & !is.null(control.predictions)) {
+    return(list(npl=npl,thr=thr,control.predictions=control.predictions));
+  }
+  return(list(npl=npl,thr=thr));
+}
+
+# window tag difference method
+wtd <- function(x,y,s,e,whs=200,return.peaks=T,min.thr=5,min.dist=200,step=1,direct.count=F,tag.weight=1,bg.x=NULL,bg.y=NULL,bg.weight=1,mask.x=NULL,mask.y=NULL,ignore.masking=F, bg.whs=whs, round.up=F, ...) {
+  ignore.masking <- ignore.masking | (is.null(mask.x) & is.null(mask.y));
+  if(step>1) {
+    x <- floor(x/step+0.5); y <- floor(y/step+0.5)
+    
+    if(!is.null(bg.x)) {
+      bg.x <- floor(bg.x/step+0.5); bg.y <- floor(bg.y/step+0.5)  
+    }
+    
+    if(!is.null(mask.x)) {
+      mask.x <- floor(mask.x/step+0.5); mask.y <- floor(mask.y/step+0.5)  
+    }
+
+    
+    whs <- floor(whs/step+0.5);
+    bg.whs <- floor(bg.whs/step+0.5);
+    min.dist <- floor(min.dist/step +0.5);
+    s <- floor(s/step+0.5)
+    e <- floor(e/step+0.5)
+  }
+
+  # scale bg.weight, since within calculation they are considered independent
+  bg.weight <- bg.weight*tag.weight;
+
+  rx <- c(s-whs,e+whs);
+
+  # compile tag vectors
+  xt <- table(x);
+  xh <- integer(diff(rx)+1);
+  xh[as.integer(names(xt))-rx[1]+1] <- as.integer(xt);
+
+  yt <- table(y);
+  yh <- integer(diff(rx)+1);
+  yh[as.integer(names(yt))-rx[1]+1] <- as.integer(yt);
+
+  # compile background vectors
+  if(!is.null(bg.x) & length(bg.x)>0) {
+    bg.subtract <- 1;
+
+    bg.xt <- table(bg.x);
+    bg.xh <- integer(diff(rx)+1);
+    bg.xh[as.integer(names(bg.xt))-rx[1]+1] <- as.integer(bg.xt);
+    rm(bg.xt);
+
+    bg.yt <- table(bg.y);
+    bg.yh <- integer(diff(rx)+1);
+    bg.yh[as.integer(names(bg.yt))-rx[1]+1] <- as.integer(bg.yt);
+    rm(bg.yt);
+
+    # adjust bg.weight according to bg.whs
+    if(bg.whs!=whs) {
+      bg.weight <- bg.weight*whs/bg.whs;
+    }
+  } else {
+    bg.subtract <- 0;
+    bg.xh <- bg.yh <- c();
+  }
+
+  # record masked positions
+  if(!ignore.masking) {
+    if(!is.null(mask.x) & length(mask.x)>0) {
+      mvx <- unique(mask.x); mvx <- setdiff(mvx,as.numeric(names(xt)));
+      mvx <- mvx[mvx>=rx[1] & mvx<=rx[2]];
+      xh[mvx-rx[1]+1] <- -1;
+    }
+
+    if(!is.null(mask.y) & length(mask.y)>0) {
+      mvy <- unique(mask.y); mvy <- setdiff(mvy,as.numeric(names(yt)));
+      mvy <- mvy[mvy>=rx[1] & mvy<=rx[2]];
+      yh[mvy-rx[1]+1] <- -1;
+    }
+  }
+
+  rm(xt,yt);
+
+  if(round.up) { round.up <- 1; } else { round.up <- 0; }
+  
+  storage.mode(xh) <- storage.mode(yh) <- "integer";
+  storage.mode(bg.xh) <- storage.mode(bg.yh) <- "integer";
+  nx <- length(xh);   storage.mode(nx) <- storage.mode(whs) <- storage.mode(bg.whs) <- "integer";
+  rp <- as.integer(return.peaks);
+  dcon <- as.integer(direct.count);
+  storage.mode(rp) <- storage.mode(min.dist) <- "integer";
+  storage.mode(min.thr) <- "double";
+  storage.mode(dcon) <- "integer";
+  storage.mode(tag.weight) <- "double";
+  storage.mode(bg.weight) <- "double";
+  storage.mode(bg.subtract) <- "integer";
+  storage.mode(round.up) <- "integer";
+  im <- as.integer(ignore.masking);
+  storage.mode(im) <- "integer";
+  z <- .Call("wtd",xh,yh,whs,rp,min.dist,min.thr,dcon,tag.weight,im,bg.subtract,bg.xh,bg.yh,bg.whs,bg.weight,round.up);
+  if(return.peaks) {
+    return(data.frame(x=(z$x+rx[1])*step,y=z$v));
+  } else {
+    return(list(x=rx*step,y=z));
+  }
+}
+
+
+tag.wtd <- function(ctv,s,e,return.peaks=T, bg.ctv=NULL,  mask.ctv=NULL, ...) {
+  x <- ctv[ctv>=s & ctv<=e];
+  y <- (-1)*ctv[ctv<=-s & ctv>=-e];
+
+  if(!is.null(bg.ctv)) {
+    bg.x <- bg.ctv[bg.ctv>=s & bg.ctv<=e];
+    bg.y <- (-1)*bg.ctv[bg.ctv<=-s & bg.ctv>=-e];
+  } else {
+    bg.x <- bg.y <- NULL;
+  }
+
+  if(!is.null(mask.ctv)) {
+    mask.x <- mask.ctv[mask.ctv>=s & mask.ctv<=e];
+    mask.y <- (-1)*mask.ctv[mask.ctv<=-s & mask.ctv>=-e];
+  } else {
+    mask.x <- mask.y <- NULL;
+  }
+
+  if(length(x)==0 | length(y) ==0) {
+    if(return.peaks) {
+      return(data.frame(x=c(),y=c()));
+    } else {
+      rx <- range(c(x,y));
+      return(list(x=rx,y=numeric(diff(rx)+1)));
+    }
+  } else {
+    return(wtd(x,y,s,e,return.peaks=return.peaks,  bg.x=bg.x,bg.y=bg.y, mask.x=mask.x,mask.y=mask.y, ...))
+  }
+}
+
+# shuffles tags in chromosome blocks of a specified size
+# note: all coordinates should be positive
+tag.block.shuffle <- function(tags,window.size=100) {
+  if(length(tags)<3) {
+    warning("too few tags for shuffling");
+    return(tags);
+  }
+  rng <- range(tags);
+  #if(rng[1]<0) { stop("negative tag coordinates found") }
+  if(diff(rng)<=window.size) {
+    warning(paste("tag range (",diff(rng),") is smaller than shuffle window size"));
+    return(tags);
+  }
+
+  if(window.size==0) {
+    return(as.integer(runif(length(tags),min=rng[1],max=rng[2])))
+  } else if(window.size==1) {
+    tt <- table(tags);
+    return(rep(runif(length(tt),min=rng[1],max=rng[2]),as.integer(tt)))
+  } else {
+  # block positions
+    bp <- tags %/% window.size;
+  # block-relative tag positions
+    rp <- tags %% window.size;
+
+  # shuffle block positions
+    bpu <- unique(bp);
+    rbp <- range(bpu);
+    bps <- as.integer(runif(length(bpu),min=rbp[1],max=rbp[2]));
+    bpi <- match(bp,bpu);
+    sbp <- bps[bpi];
+    #sbp <- rbp[1]+match(bp,sample(rbp[1]:rbp[2]))
+    return(sbp*window.size+rp);
+  }
+}
+
+
+# calculate window cross-correlation
+lwcc <- function(x,y,s,e,whs=100,isize=20,return.peaks=T,min.thr=1,min.dist=100,step=1,tag.weight=1,bg.x=NULL,bg.y=NULL,bg.weight=NULL,mask.x=NULL,mask.y=NULL,bg.whs=whs,round.up=F) {
+  if(step>1) {
+    x <- floor(x/step+0.5); y <- floor(y/step+0.5)
+    
+    if(!is.null(bg.x)) {
+      bg.x <- floor(bg.x/step+0.5); bg.y <- floor(bg.y/step+0.5)  
+    }
+    
+    if(!is.null(mask.x)) {
+      mask.x <- floor(mask.x/step+0.5); mask.y <- floor(mask.y/step+0.5)  
+    }
+
+    whs <- floor(whs/step+0.5);
+    bg.whs <- floor(bg.whs/step+0.5);
+    isize <- floor(isize/step+0.5);
+    min.dist <- floor(min.dist/step +0.5);
+    s <- floor(s/step+0.5)
+    e <- floor(e/step+0.5)
+  }
+
+  # scale bg.weight, since within calculation they are considered independent
+  bg.weight <- bg.weight*tag.weight;
+
+  
+  rx <- c(s-whs,e+whs);
+  xt <- table(x);
+  xh <- integer(diff(rx)+1);
+  xh[as.integer(names(xt))-rx[1]+1] <- as.integer(xt);
+
+  yt <- table(y);
+  
+  yh <- integer(diff(rx)+1);
+  yh[as.integer(names(yt))-rx[1]+1] <- as.integer(yt);
+
+  # compile background vectors
+  if(!is.null(bg.x) & length(bg.x)>0) {
+    bg.subtract <- 1;
+
+    bg.xt <- table(bg.x);
+    bg.xh <- integer(diff(rx)+1);
+    bg.xh[as.integer(names(bg.xt))-rx[1]+1] <- as.integer(bg.xt);
+    rm(bg.xt);
+
+    bg.yt <- table(bg.y);
+    bg.yh <- integer(diff(rx)+1);
+    bg.yh[as.integer(names(bg.yt))-rx[1]+1] <- as.integer(bg.yt);
+    rm(bg.yt);
+
+    # adjust bg.weight according to bg.whs
+    bg.weight <- bg.weight*(whs-isize)/bg.whs;
+  } else {
+    bg.subtract <- 0;
+    bg.xh <- bg.yh <- c();
+  }
+
+  # record masked positions
+  if(!is.null(mask.x) & length(mask.x)>0) {
+    mvx <- unique(mask.x); mvx <- setdiff(mvx,as.numeric(names(xt)));
+    mvx <- mvx[mvx>=rx[1] & mvx<=rx[2]];
+    
+    xh[mvx-rx[1]+1] <- -1;
+  }
+
+  if(!is.null(mask.y) & length(mask.y)>0) {
+    mvy <- unique(mask.y); mvy <- setdiff(mvy,as.numeric(names(yt)));
+    mvy <- mvy[mvy>=rx[1] & mvy<=rx[2]];
+    yh[mvy-rx[1]+1] <- -1;
+  } 
+  
+  rm(xt,yt);
+  if(round.up) { round.up <- 1; } else { round.up <- 0; }
+  
+  storage.mode(xh) <- storage.mode(yh) <- "integer";
+  storage.mode(bg.xh) <- storage.mode(bg.yh) <- "integer";
+  nx <- length(xh);   storage.mode(nx) <- storage.mode(whs) <- storage.mode(isize) <- storage.mode(bg.whs) <- "integer";
+  rp <- as.integer(return.peaks);
+  storage.mode(rp) <- storage.mode(min.dist) <- "integer";
+  storage.mode(min.thr) <- "double";
+  storage.mode(tag.weight) <- "double";
+  storage.mode(bg.weight) <- "double";
+  storage.mode(bg.subtract) <- "integer";
+  storage.mode(round.up) <- "integer";
+
+  # allocate return arrays
+  #cc <- numeric(nx); storage.mode(cc) <- "double";
+  z <- .Call("lwcc",xh,yh,whs,isize,rp,min.dist,min.thr,tag.weight,bg.subtract,bg.xh,bg.yh,bg.whs,bg.weight,round.up);
+  if(return.peaks) {
+    return(data.frame(x=(z$x+rx[1])*step,y=z$v));
+  } else {
+    return(list(x=rx*step,y=z));
+  }
+}
+
+
+tag.lwcc <- function(ctv,s,e,return.peaks=T, bg.ctv=NULL, mask.ctv=NULL, ...) {
+  x <- ctv[ctv>=s & ctv<=e];
+  y <- (-1)*ctv[ctv<=-s & ctv>=-e];
+
+  if(!is.null(bg.ctv)) {
+    bg.x <- bg.ctv[bg.ctv>=s & bg.ctv<=e];
+    bg.y <- (-1)*bg.ctv[bg.ctv<=-s & bg.ctv>=-e];
+  } else {
+    bg.x <- bg.y <- NULL;
+  }
+
+  if(!is.null(mask.ctv)) {
+    mask.x <- mask.ctv[mask.ctv>=s & mask.ctv<=e];
+    mask.y <- (-1)*mask.ctv[mask.ctv<=-s & mask.ctv>=-e];
+  } else {
+    mask.x <- mask.y <- NULL;
+  }
+  
+  if(length(x)==0 | length(y) ==0) {
+    if(return.peaks) {
+      return(data.frame(x=c(),y=c()));
+    } else {
+      rx <- range(c(x,y));
+      return(list(x=rx,y=numeric(diff(rx)+1)));
+    }
+  } else { 
+    return(lwcc(x,y, s,e,return.peaks=return.peaks, bg.x=bg.x,bg.y=bg.y,  mask.x=mask.x,mask.y=mask.y, ...))
+  }
+}
+
+# determine mirror-based binding positions using sliding window along each chromosome
+# extra parameters are passed on to call.nucleosomes()
+window.call.mirror.binding <- function(tvl,window.size=4e7, debug=T, cluster=NULL, bg.tl=NULL, mask.tl=NULL, background.density.scaling=T, ...) {
+  chrl <- names(tvl);
+  # determine bg.weight
+  if(!is.null(bg.tl)) {
+    bg.weight <- dataset.density.ratio(tvl,bg.tl,background.density.scaling=background.density.scaling);
+  } else {
+    bg.weight <- NULL;
+  }
+  if(debug) {
+    cat("bg.weight=",bg.weight," ");
+  }
+  
+  names(chrl) <- chrl;
+
+  if(is.null(cluster)) {
+    return(lapply(chrl,function(chr) {
+      bg.ctv <- NULL; if(!is.null(bg.tl)) { bg.ctv <- bg.tl[[chr]]; };
+      mask.ctv <- NULL; if(!is.null(mask.tl)) { mask.ctv <- mask.tl[[chr]]; };
+      
+      window.chr.call.mirror.binding(list(ctv=tvl[[chr]],bg.ctv=bg.ctv,mask.ctv=mask.ctv),window.size=window.size,chr=chr,debug=debug, bg.weight=bg.weight, bg.ctv=bg.ctv, mask.ctv=mask.ctv, ...);
+    }));
+  } else {
+    # add bg.ctv and mask.ctv to parallel call
+    tvll <- lapply(chrl,function(chr) {
+      bg.ctv <- NULL; if(!is.null(bg.tl)) { bg.ctv <- bg.tl[[chr]]; };
+      mask.ctv <- NULL; if(!is.null(mask.tl)) { mask.ctv <- mask.tl[[chr]]; };
+      return(list(ctv=tvl[[chr]],bg.ctv=bg.ctv,mask.ctv=mask.ctv))
+    });
+    bl <- clusterApplyLB(cluster,tvll,window.chr.call.mirror.binding,window.size=window.size,debug=debug, bg.weight=bg.weight, ...);
+    names(bl) <- chrl;
+    return(bl);
+  }
+}
+
+window.chr.call.mirror.binding <- function(ctvl,window.size,debug=T, chr="NA", cluster=NULL, method=tag.wtd, bg.ctv=NULL, mask.ctv=NULL, ...) {
+  ctv <- ctvl$ctv; bg.ctv <- ctvl$bg.ctv; mask.ctv <- ctvl$mask.ctv;
+  if(is.null(ctv)) { return(data.frame(x=c(),y=c())) }
+  if(length(ctv)<2) { return(data.frame(x=c(),y=c())) }
+  
+  dr <- range(unlist(lapply(ctv,function(x) range(abs(x)))))
+  n.windows <- ceiling(diff(dr)/window.size);
+  
+  
+  pinfo <- c();
+  if(debug) {
+    cat(paste("processing ",chr," in ",n.windows," steps [",sep=""));
+  }
+  for(i in 1:n.windows) {
+    s <- dr[1]+(i-1)*window.size;
+    npn <- method(s=s, e=s+window.size,ctv=ctv, return.peaks=T, bg.ctv=bg.ctv, mask.ctv=mask.ctv, ... );
+    if(length(npn) > 0) { pinfo <- rbind(pinfo,npn)  }
+    if(debug) {
+      cat(".");
+    }
+  }
+  if(debug) {
+    cat(paste("] done (",dim(pinfo)[1],"positions)\n"));
+  } else {
+    cat(".");
+  }
+  return(data.frame(x=pinfo[,1],y=pinfo[,2]));
+}
+
+generate.randomized.data <- function(data,shuffle.window=1,shuffle.both.strands=T,strand.shuffle.only=F,chrl=names(data)) {
+  names(chrl) <- unlist(chrl);
+  if(strand.shuffle.only) {
+    # shuffle just strand assignment, not tag positions
+    rt <- lapply(data[unlist(chrl)],function(tv) tv*sample(c(-1,1),length(tv),replace=T));
+  } else {
+    if(shuffle.both.strands) {
+      rt <- lapply(data[unlist(chrl)],function(tv) {
+        pti <- which(tv>0); return(c(tag.block.shuffle(tv[pti],window.size=shuffle.window),tag.block.shuffle(tv[-pti],window.size=shuffle.window)))
+      });
+    } else {
+      rt <- lapply(data[unlist(chrl)],function(tv) { pti <- which(tv>0); return(c(tag.block.shuffle(tv[pti],window.size=shuffle.window),tv[-pti]))});
+    }
+  }
+}
+
+# determine threshold based on E value
+# for efficiency chrl should include just one or two small chromosomes
+# optional parameters are passed to call.nucleosomes()
+determine.lwcc.threshold <- function(tvl,chrl=names(tvl),e.value=100, n.randomizations=1, min.thr=1, debug=F, tol=1e-2, shuffle.window=1, shuffle.both.strands=T, return.rtp=F, control=NULL, strand.shuffle=F, ...) {
+  names(chrl) <- unlist(chrl);
+  
+  # determine fraction of total tags contained in the specified nucleosomes
+  ntags <- sum(unlist(lapply(tvl,function(cv)  length(cv))));
+  nctags <- sum(unlist(lapply(chrl, function(cn) length(tvl[[cn]]))));
+  # calculate actual target E value
+  if(!is.null(control)) {
+    n.randomizations <- length(control);
+  }
+  eval <- e.value*n.randomizations*nctags/ntags
+  if(eval<1) {
+    warning("specified e.value and set of chromosomes results in target e.value of less than 1");
+    eval <- 1;
+  }
+  
+  if(debug) {
+    cat(paste("randomizations =",n.randomizations," chromosomes =",length(chrl),"\n"))
+    cat(paste("adjusted target eval =",eval,"\ngenerating randomized tag peaks ..."));
+  }
+
+  # get peaks on randomized tags
+  if(is.null(control)) {
+    rtp <- data.frame(do.call(rbind,lapply(1:n.randomizations,function(i) {
+      if(strand.shuffle) {
+        # shuffle just strand assignment, not tag positions
+        rt <- lapply(tvl[unlist(chrl)],function(tv) tv*sample(c(-1,1),length(tv),replace=T));
+      } else {
+        if(shuffle.both.strands) {
+          rt <- lapply(tvl[unlist(chrl)],function(tv) {
+            pti <- which(tv>0); return(c(tag.block.shuffle(tv[pti],window.size=shuffle.window),tag.block.shuffle(tv[-pti],window.size=shuffle.window)))
+          });
+        } else {
+          rt <- lapply(tvl[unlist(chrl)],function(tv) { pti <- which(tv>0); return(c(tag.block.shuffle(tv[pti],window.size=shuffle.window),tv[-pti]))});
+        }
+      }
+      if(debug) {
+        cat(".");
+      }
+      rl <- window.call.mirror.binding(rt,min.thr=min.thr, debug=F, ...);
+      
+      return(do.call(rbind,rl))
+      #return(do.call(rbind,window.call.mirror.binding(rt,min.thr=min.thr, debug=F, whs=100,isize=10,window.size=3e7,min.dist=200)))
+    })));
+
+  } else {
+    if(debug) {
+      cat(" using provided controls ");
+    }
+    rtp <- data.frame(do.call(rbind,lapply(control,function(rt) do.call(rbind,window.call.mirror.binding(rt,min.thr=min.thr, debug=F, ...)))))
+  }
+
+  if(return.rtp) {
+    return(rtp)
+  }
+
+  if(debug) {
+    cat(" done\nfinding threshold .");
+  }
+
+  # determine range and starting value
+  rng <- c(min.thr,max(na.omit(rtp$y)))
+  
+    # find E value threshold
+  count.nucs.f <- function(nthr) {
+    return(eval-length(which(rtp$y>=nthr)));
+  }
+  
+  # estimate position of the root by downward bisection iterations
+  mv <- c(eval); mvp <- c(rng[2]); ni <- 1;
+  max.it <- 2*as.integer(log2(rng[2]/rng[1])+0.5);
+  while((ni<=max.it) & (mv[1]>=0)) {
+    np <- mvp[1]/2;
+    npv <- count.nucs.f(np);
+    mv <- c(npv,mv);
+    mvp <- c(np,mvp);
+    ni <- ni+1;
+  }
+  
+  
+  if(ni>max.it) {
+    # determine lowest value
+    if(debug) {
+      cat(paste("exceeded max.it (",max.it,"), returning lowest point",signif(mvp[1],4)));
+    }
+    return(list(root=mvp[1]))
+  } else {
+    rng <- mvp[1:2];
+    if(mv[2]==0) rng[2] <- mvp[3];
+    if(debug) {
+      cat(paste("bound to (",signif(rng[1],4),signif(rng[2],4),") "));
+    }
+  }
+  
+  # find root on the right side
+  x <- uniroot(count.nucs.f,rng,tol=tol);
+  #x$max <- o$par;
+  #x$f.max <- (-1)*o$value;
+  if(debug) {
+    cat(paste(" done (thr=",signif(x$root,4),")\n"));
+  }
+  return(x);
+
+}
+
+
+# determine membership of points in fragments
+points.within <- function(x,fs,fe,return.list=F,return.unique=F,sorted=F,return.point.counts=F) {
+  if(is.null(x) | length(x) < 1) { return(c()) };
+  if(!sorted) {
+    ox <- rank(x,ties="first");
+    x <- sort(x);
+  }
+
+  se <- c(fs,fe);
+  fi <- seq(1:length(fs));
+  fi <- c(fi,-1*fi);
+
+  fi <- fi[order(se)];
+  se <- sort(se);
+  
+  storage.mode(x) <- storage.mode(fi) <- storage.mode(se) <- "integer";
+  if(return.unique) { iu <- 1; } else { iu <- 0; }
+  if(return.list) { il <- 1; } else { il <- 0; }
+  if(return.point.counts) { rpc <- 1; } else { rpc <- 0; }
+  storage.mode(iu) <- storage.mode(il) <- storage.mode(rpc) <- "integer";
+  result <- .Call("points_within",x,se,fi,il,iu,rpc);
+  if(!sorted & !return.point.counts) {
+    result <- result[ox];
+  }
+  return(result);  
+}
+
+
+# determine cooridnates of points x relative to signed
+# positions pos within size range
+get.relative.coordinates <- function(x,pos,size,sorted=F) {
+  if(!sorted) {
+    op <- order(abs(pos));
+    x <- sort(x); pos <- pos[op];
+  }
+  #dyn.load("~/zhao/sc/peaks.so");
+  storage.mode(x) <- storage.mode(pos) <- storage.mode(size) <- "integer";
+  rf <- .Call("get_relative_coordinates",x,pos,size);
+  if(!sorted) { 
+    rf$i <- op[rf$i];
+  } else {
+    return(rf$i);
+  }
+  return(rf);
+}
+
+# given list of magnitude values for signal(x) and control (y),
+# return a dataframe with $e.val and $fdr
+get.eval.fdr.vectors <- function(x,y) {
+  nx <- length(x); ny <- length(y);
+  if(nx==0) { return(data.frame(evalue=c(),fdr=c())) }
+  if(ny==0) { return(data.frame(evalue=rep(0,nx),fdr=rep(1,nx))) }
+  ex <- ecdf(x); ey <- ecdf(y);
+
+  evals <- (1-ey(x))*ny;
+  yvals <- (1-ex(x))*nx;
+  fdr <- (evals+0.5)/(yvals+0.5); # with pseudo-counts
+  fdr[yvals==0] <- min(fdr); # correct for undercounts
+  # find a min x corresponding to a minimal FDR
+  mfdr <- min(fdr);
+  mfdrmx <- min(x[fdr==mfdr]);
+  # correct
+  fdr[x>=mfdrmx] <- mfdr;
+  return(data.frame(evalue=(evals+1),fdr=fdr));
+}
+
+
+# filter predictions to remove calls failling into the tag enrichment clusters ( chr list of $s/$e dfs)
+filter.binding.sites <- function(bd,tec,exclude=F) {
+  chrl <- names(bd); names(chrl) <- chrl;
+  lapply(chrl,function(chr) {
+    cbd <- bd[[chr]];
+    if(is.null(cbd)) { return(NULL) };
+    if(length(cbd)==0) { return(NULL) };
+    if(dim(cbd)[1]>0) {
+      ctec <- tec[[chr]];
+      if(length(ctec$s)>0) {
+        if(exclude) {
+          pwi <- which(points.within(cbd$x,ctec$s,ctec$e)== -1);
+        } else {
+          pwi <- which(points.within(cbd$x,ctec$s,ctec$e)> -1);
+        }
+        return(cbd[pwi,]);
+      } else {
+        if(exclude) {
+          return(cbd);
+        } else {
+          return(data.frame(x=c(),y=c()));
+        }
+      }
+    } else {
+      return(cbd);
+    }
+  });  
+}
+
+
+# PUBLIC
+# generate predictions on sequential (chained) subsamples of data
+# if step.size <1, it is intepreted as a fraciton and a  each subsequent subsample
+# is of a size (1-fraction.step)*N (N - size of the signal data);
+# otherwise the step.size is interpreted as a number of tags, and each subsample is of the size N-step.size
+get.subsample.chain.calls <- function(signal.data,control.data,n.steps=NULL,step.size=1e6,subsample.control=F,debug=F,min.ntags=1e3, excluded.steps=c(), test.chromosomes=NULL, ... ) {
+
+  if(!is.null(test.chromosomes)) {
+    # adjust step size
+    sz <- sum(unlist(lapply(signal.data,length)))
+    signal.data <- signal.data[test.chromosomes];
+    control.data <- control.data[test.chromosomes];
+    
+    if(step.size>1) {
+      step.size <- step.size*sum(unlist(lapply(signal.data,length)))/sz;
+        # cat("adjusted step.size=",step.size,"\n");
+    }
+  }
+
+  if(is.null(n.steps)) {
+    if(step.size<1) {
+      # down to 10%
+      n.steps <- log(0.1)/log(step.size);
+    } else {
+      n.steps <- floor(sum(unlist(lapply(signal.data,length)))/step.size)
+    }
+  }
+  if(subsample.control & !is.null(control.data)) {
+    # normalize control to the signal size
+    if(debug) { cat("pre-subsampling control.\n"); }
+    bg.weight <- sum(unlist(lapply(signal.data,length)))/sum(unlist(lapply(control.data,length)))
+    control.data <- lapply(control.data,function(d) sample(d,length(d)*bg.weight,replace=(bg.weight>1)))
+  }
+  calls <- list();
+  callnames <- c();
+  for(i in 0:n.steps) {
+    if(debug) { cat("chained subsample step",i,":\n"); }
+    if(!i %in% excluded.steps) {
+      ans <- list(find.binding.positions(signal.data=signal.data,control.data=control.data,debug=debug, skip.control.normalization=T, ...));
+      names(ans) <- as.character(c(i));
+      calls <- c(calls,ans);
+      callnames <- c(callnames,i);
+    }
+    # subsample
+    if(step.size<1) {
+      # fraction steps
+      f <- 1-step.size;
+    } else {
+      # bin steps
+      sz <- sum(unlist(lapply(signal.data,length)));
+      f <- (sz-step.size)/sz;
+      if(f<=0) break;
+    }
+    if(debug) { cat("chained subsampling using fraction",f,".\n"); }
+    signal.data <- lapply(signal.data,function(d) sample(d,length(d)*f));
+    if(subsample.control & !is.null(control.data)) {
+      control.data <- lapply(control.data,function(d) sample(d,length(d)*f));
+    }
+    sz <- sum(unlist(lapply(signal.data,length)));
+    if(sz<min.ntags) break;
+  }
+  names(calls) <- callnames;
+  return(calls);
+}
+
+
+# chain-subsample dataset and calculate MSER interpolation
+mser.chain.interpolation <- function(signal.data=NULL,control.data=NULL,chains=NULL,n.chains=5,debug=F, enrichment.background.scales=c(1,5), test.agreement=0.99, agreement.distance=50, return.median=F, mean.trim=0.1, enr.field="enr", return.lists=F, ...) {
+  if(is.null(chains)) {
+    cn <- c(1:n.chains); names(cn) <- cn;
+    tf <- function(i, ...) get.subsample.chain.calls(signal.data,control.data,debug=debug, enrichment.background.scales=enrichment.background.scales, ...);
+    chains <- lapply(cn,tf,...);
+  } 
+  names(enrichment.background.scales) <- enrichment.background.scales;
+  lapply(enrichment.background.scales,function(scale) {
+    actual.enr.field <- enr.field;
+    if(scale>1) {
+      actual.enr.field <- paste(actual.enr.field,scale,sep=".");
+    }
+      
+    cvl <- lapply(chains,function(chain) {
+      nn <- sort(unlist(lapply(chain,function(d) d$n)),decreasing=T);
+      nd <- diff(nn);
+      nn <- nn[-length(nn)];
+      me <- lapply(c(2:length(chain)),function(i) {
+        sla <- t.precalculate.ref.peak.agreement(chain[[i-1]],chain[i],agreement.distance=agreement.distance,enr.field=actual.enr.field)
+        me <- t.find.min.saturated.enr(sla,thr=1-test.agreement)
+        menr <- max(min(na.omit(unlist(lapply(chain[[i-1]]$npl,function(d) d[actual.enr.field])))),min(na.omit(unlist(lapply(chain[[i]]$npl,function(d) d[actual.enr.field])))),1)
+        if(me<=menr) { me <- 1; };
+        return(me);
+      })
+      data.frame(n=nn,me=unlist(me),nd=nd);
+    });
+    if(return.lists) { return(cvl) }
+    cvl <- na.omit(do.call(rbind,cvl));
+    if(return.median) {
+      tv <- tapply(cvl$me,as.factor(cvl$n),median)
+    } else {
+      tv <- tapply(cvl$me,as.factor(cvl$n),mean,trim=mean.trim);
+    }
+    df <- data.frame(n=as.numeric(names(tv)),me=as.numeric(tv));
+    return(df[order(df$n,decreasing=T),])
+  })
+}
+
+
+
+# returns agreement as a function of dataset size, possibly filtering peaks by min.enr threshold, and by max.fdr
+chain.to.reference.comparison <- function(chains,min.enr=NULL,debug=F,agreement.distance=50, return.median=F, mean.trim=0.1, enr.field="enr",max.fdr=NULL) {
+  cvl <- lapply(chains,function(chain) {
+    # filter chain by fdr
+    if(!is.null(max.fdr)) {
+      chain <- lapply(chain,function(d) { d$npl <- lapply(d$npl,function(cd) cd[cd$fdr<=max.fdr,]); return(d); });
+    }
+    nn <- sort(unlist(lapply(chain,function(d) d$n)),decreasing=T);
+    nn <- nn[-length(nn)];
+    me <- lapply(c(2:length(chain)),function(i) {
+      sla <- t.precalculate.ref.peak.agreement(chain[[1]],chain[i],agreement.distance=agreement.distance,enr.field=enr.field)
+      # calculate overlap
+      x <- lapply(sla,function(mpd) {
+        if(!is.null(min.enr)) {
+
+          me <- mpd$re >= min.enr;
+          me[is.na(me)] <- F;
+          mpd <- mpd[me,];
+          ome <- mpd$oe < min.enr;
+          ome[is.na(ome)] <- T;
+          mpd$ov[ome] <- 0;
+        }
+        return(mean(mpd$ov));
+      })
+    })
+    
+    data.frame(n=nn,me=unlist(me));
+  });
+
+  cvl <- na.omit(do.call(rbind,cvl));
+  if(return.median) {
+    tv <- tapply(cvl$me,as.factor(cvl$n),median)
+  } else {
+    tv <- tapply(cvl$me,as.factor(cvl$n),mean,trim=mean.trim);
+  }
+  df <- data.frame(n=as.numeric(names(tv)),me=as.numeric(tv));
+  return(df[order(df$n,decreasing=T),])
+}
+
+
+# estimates enrichment confidence interval based on 2*tag.count.whs window around each position, and a z-score (alpha/2)
+# if(multiple.background.scales=T) the enrichment is also estimated using 5- and 10-fold increased background tag window
+# adds $enr (lower bound), $enr.ub (upper bound) and $enr.mle fields
+calculate.enrichment.estimates <- function(binding.positions,signal.data=NULL,control.data=NULL,fraction=1,tag.count.whs=100,z=2,effective.genome.size=3e9,scale.down.control=F,background.scales=c(1),bg.weight=NULL) {
+  f <- fraction;
+  qv <- pnorm(z,lower.tail=F);
+  cn <- names(binding.positions$npl); names(cn) <- cn;
+
+  if(is.null(control.data)) {
+    # estimate from gamma distribution
+    fg.lambda <- f*sum(unlist(lapply(signal.data,length)))*2*tag.count.whs/effective.genome.size;
+    binding.positions$npl <- lapply(binding.positions$npl,function(d) {
+      d$enr <- qgamma(qv,d$nt,scale=1)/fg.lambda;
+      d$enr.ub <- qgamma(1-qv,d$nt,scale=1)/fg.lambda;
+      d$enr.mle <- d$nt/fg.lambda;
+      return(d);
+    });      
+  } else {
+    # estimate using beta distribution
+    if(is.null(bg.weight)) {
+      bg.weight <- sum(unlist(lapply(signal.data,length)))/sum(unlist(lapply(control.data,length)))
+    }
+    
+    if(scale.down.control) {
+      # sample down control to be the same size as true signal.data (bg.weight*f)
+      control.data <- lapply(control.data,function(d) sample(d,length(d)*bg.weight*f,replace=(f*bg.weight>1)))
+      #bg.weight <- sum(unlist(lapply(signal.data,length)))/sum(unlist(lapply(control.data,length)))
+      bg.weight <- 1/f;
+      
+    }
+
+    binding.positions$enrichment.bg.weight <- bg.weight;
+    binding.positions$enrichment.whs <- tag.count.whs;
+    binding.positions$enrichment.z <- z;
+    
+    binding.positions$npl <- lapply(cn,function(chr) {
+      d <- binding.positions$npl[[chr]];
+      
+      edf <- lapply(background.scales,function(background.width.multiplier) {
+        sig.mult <- bg.weight*f/background.width.multiplier;
+        nbg <- points.within(abs(control.data[[chr]]),d$x-tag.count.whs*background.width.multiplier,d$x+tag.count.whs*background.width.multiplier,return.point.counts=T,return.unique=F);
+        
+        nfg <- d$nt;
+      
+        
+        # Poisson ratio Bayesian LB with non-informative prior (Clopper & Pearson 1934)
+        nf <- ((nfg+0.5)/(nbg+0.5))*qf(1-qv,2*(nfg+0.5),2*(nbg+0.5),lower.tail=F)
+        nf <- nf/sig.mult;
+        
+        ub <- ((nfg+0.5)/(nbg+0.5))*qf(qv,2*(nfg+0.5),2*(nbg+0.5),lower.tail=F)
+        ub <- ub/sig.mult;
+        
+        mle <- (nfg+0.5)/(nbg+0.5);
+        mle <- mle/sig.mult;
+        if(is.null(nbg)) { nbg <- numeric(0) }
+        if(is.null(nf)) { nf <- numeric(0) }
+        if(is.null(ub)) { ub <- numeric(0) }
+        if(is.null(mle)) { mle <- numeric(0) }
+        return(data.frame(nbg=nbg,lb=nf,ub=ub,mle=mle))
+      })
+
+      adf <- do.call(cbind,lapply(c(1:length(background.scales)),function(i) {
+        df <- edf[[i]];
+        cn <- c("nbgt","enr","enr.ub","enr.mle");
+        if(background.scales[i]>1) {
+          cn <- paste(cn,as.character(background.scales[i]),sep=".");
+        }
+        names(df) <- cn;
+        return(df);
+      }))
+
+      return(cbind(d,adf));
+    });
+  }
+  
+  return(binding.positions);
+}
+
+
+# precalculate peak agreement of a sampling list given a reference
+t.precalculate.ref.peak.agreement <- function(ref,sf,agreement.distance=50,enr.field="enr") {
+  ref <- ref$npl;
+  cn <- names(ref); names(cn) <- cn;
+
+  # for each sampling round
+  lapply(sf,function(sd) {
+    # calculate overlap
+      
+    ov <- data.frame(do.call(rbind,lapply(cn,function(chr) {
+      if(dim(ref[[chr]])[1]<1) { return(cbind(ov=c(),re=c(),oe=c())) };
+      pwi <- points.within(ref[[chr]]$x,sd$npl[[chr]]$x-agreement.distance,sd$npl[[chr]]$x+agreement.distance);
+      pwi[pwi==-1] <- NA;
+      renr <- ref[[chr]][,enr.field]
+      oenr <- sd$npl[[chr]][,enr.field][pwi];
+      if(length(oenr)==0) { oenr <- rep(NA,length(renr)); }
+      return(cbind(ov=as.integer(!is.na(pwi)),re=renr,oe=oenr));
+    })))
+  })
+}
+
+
+# find minimal saturated enrichment given a list of replicate agreement matrices (for one fraction)
+t.find.min.saturated.enr <- function(pal,thr=0.01,plot=F,return.number.of.peaks=F,plot.individual=T,return.median=F,return.vector=F) {
+  nr <- length(pal);
+  # merge replicate data frames
+  mpd <- data.frame(do.call(rbind,pal));
+
+  mpd$re[is.na(mpd$re)] <- Inf;
+  mpd$oe[is.na(mpd$oe)] <- Inf;
+
+  
+
+  # round up values to avoid miscounting
+  mpd$re <- round(mpd$re,digits=2);
+  mpd$oe <- round(mpd$oe,digits=2);
+
+  me <- pmin(mpd$re,mpd$oe);
+  ome <- order(me,decreasing=T);
+  df <- data.frame(me=me[ome],ov=mpd$ov[ome]);
+  recdf <- ecdf(-mpd$re); ren <- length(mpd$re);
+
+  # collapse equal peak heights
+  xk <- tapply(df$ov,as.factor(df$me),sum); xk <- data.frame(ov=as.numeric(xk),me=as.numeric(names(xk))); xk <- xk[order(xk$me,decreasing=T),];
+
+  
+  cso <- cumsum(xk$ov)/(recdf(-xk$me)*ren);
+  cso[is.na(cso)] <- 0;
+  cso[!is.finite(cso)] <- 0;
+  mv <- max(which(cso >= 1-thr))
+  menr <- xk$me[mv];
+
+  ir <- lapply(pal,function(d) {
+    d$re[is.na(d$re)] <- Inf;
+    d$oe[is.na(d$oe)] <- Inf;
+        
+    me <- pmin(d$re,d$oe);
+    ome <- order(me,decreasing=T);
+    df <- data.frame(me=me[ome],ov=d$ov[ome]);
+    cso <- cumsum(df$ov)/c(1:length(df$ov));
+    mv <- max(which(cso >= 1-thr))
+    menr <- df$me[mv];
+    return(list(df=df,menr=menr));
+  });
+
+  if(plot) {
+    par(mar = c(3.5,3.5,2.0,0.5), mgp = c(2,0.65,0), cex = 0.8);
+    plot(df$me,cumsum(df$ov)/c(1:length(df$ov)),type='l',ylab="fraction of positions overlapping with reference",xlab="minimal enrichment of binding positions",xlim=c(min(df$me),2*menr));
+    abline(h=1-thr,lty=2,col=4)
+    if(plot.individual) {
+      lapply(ir,function(d) {
+        df <- d$df;
+        lines(df$me,cumsum(df$ov)/c(1:length(df$ov)),col=8);
+        abline(v=menr,col="pink",lty=3)
+      });
+      lines(df$me,cumsum(df$ov)/c(1:length(df$ov)),col=1);
+    }
+    abline(v=menr,col=2,lty=2)
+    legend(x="bottomright",lty=c(1,2,1,3,2),col=c(1,2,8,"pink",4),legend=c("combined samples","combined sample MSER","individual samples","individual MSERs","consistency threshold"));
+  }
+
+  if(return.number.of.peaks) {
+    mpd <- data.frame(do.call(rbind,pal));
+    return(length(which(!is.na(mpd$re) & mpd$re >=menr))/nr);
+  } else {
+    if(return.vector) {
+      return(unlist(lapply(ir,function(d) d$menr)));
+    }
+    if(return.median) {
+      return(median(unlist(lapply(ir,function(d) d$menr))));
+    } else {
+      return(menr);
+    }
+  }
+}
+
+
+
+# determine d1/d2 dataset size ratio. If background.density.scaling=F, the ratio of tag counts is returned.
+# if background.density.scaling=T, regions of significant tag enrichment are masked prior to ratio calculation.
+dataset.density.ratio <- function(d1,d2,min.tag.count.z=4.3,wsize=1e3,mcs=0,background.density.scaling=T) {
+  if(!background.density.scaling) {
+    return(sum(unlist(lapply(d1,length)))/sum(unlist(lapply(d2,length))))
+  }
+
+  chrl <- intersect(names(d1),names(d2));
+  ntc <- do.call(rbind,lapply(chrl,function(chr) {
+    x1 <- tag.enrichment.clusters(abs(d1[[chr]]),c(),wsize=wsize,bg.weight=0,min.tag.count.z=min.tag.count.z,mcs=mcs,either=F)
+    x2 <- tag.enrichment.clusters(abs(d2[[chr]]),c(),wsize=wsize,bg.weight=0,min.tag.count.z=min.tag.count.z,mcs=mcs,either=F)
+    return(c(length(which(points.within(abs(d1[[chr]]),c(x1$s,x2$s)-wsize/2,c(x1$e,x2$e)+wsize/2)==-1)),length(which(points.within(abs(d2[[chr]]),c(x1$s,x2$s)-wsize/2,c(x1$e,x2$e)+wsize/2)==-1))))
+  }))
+  ntcs <- apply(ntc,2,sum);
+  #print(ntcs/c(sum(unlist(lapply(d1,length))),sum(unlist(lapply(d2,length)))));
+  return(ntcs[1]/ntcs[2])
+}
+
+# returns effective size of the dataset based on the same logic as dataset.density.ratio
+dataset.density.size <- function(d1,min.tag.count.z=4.3,wsize=1e3,mcs=0,background.density.scaling=T) {
+  if(!background.density.scaling) {
+    return(sum(unlist(lapply(d1,length))))
+  }
+
+  chrl <- names(d1);
+  ntc <- lapply(chrl,function(chr) {
+    x1 <- tag.enrichment.clusters(abs(d1[[chr]]),c(),wsize=wsize,bg.weight=0,min.tag.count.z=min.tag.count.z,mcs=mcs,either=F)
+    return(length(which(points.within(abs(d1[[chr]]),x1$s-wsize/2,x1$e+wsize/2)==-1)))
+  })
+  return(sum(unlist(ntc)))
+}
+
+old.dataset.density.ratio <- function(d1,d2,min.tag.count.z=4.3,wsize=1e3,mcs=0,background.density.scaling=T) {
+  if(!background.density.scaling) {
+    return(sum(unlist(lapply(d1,length)))/sum(unlist(lapply(d2,length))))
+  }
+  
+  t.chromosome.counts <- function(tl) {
+    lapply(tl,function(d) {
+      x <- tag.enrichment.clusters(abs(d),c(),wsize=wsize,bg.weight=0,min.tag.count.z=min.tag.count.z,mcs=mcs,either=F)
+      x$s <- x$s-wsize/2; x$e <- x$e+wsize/2;
+      x <- regionset.intersection.c(list(x),do.union=T)
+      return(c(n=length(which(points.within(abs(d),x$s,x$e)==-1)),s=diff(range(abs(d))),m=sum(x$e-x$s)));
+    })
+  }
+
+  l1 <- t.chromosome.counts(d1);
+  l2 <- t.chromosome.counts(d2);
+
+  l2 <- data.frame(do.call(rbind,l2[names(l1)]));
+  l1 <- data.frame(do.call(rbind,l1));
+
+  # genome size
+  gs <- sum(pmax(l1$s,l2$s))
+
+  den1 <- sum(l1$n)/(gs-sum(l1$m))
+  den2 <- sum(l2$n)/(gs-sum(l2$m))
+  return(den1/den2);
+}
+
+
+
+
+# calculate cumulative density based on sum of scaled gaussian curves
+# (by Michael Tolstorukov)
+#
+# vin - input vector; bw -- standard deviation, dw-gaussina cutoff in stdev; dout - output "density")
+# output - if return.x=F vector of cumulative density values corresponding to integer positions described by range(vin)
+# output - if return.x=T a data structure with $x and $y corresponding to the cumulative density
+# optional match.wt.f is a function that will return weights for a tag vector
+densum <- function(vin,bw=5,dw=3,match.wt.f=NULL,return.x=T,from=min(vin),to=max(vin),step=1)    {
+  # construct vector of unique tags and their counts
+  tc <- table(vin[vin>=from & vin<=to]);
+  pos <- as.numeric(names(tc)); storage.mode(pos) <- "double";
+  tc <- as.numeric(tc); storage.mode(tc) <- "double";
+  n <- length(pos)
+  # weight counts
+  if(!is.null(match.wt.f)) {
+    tc <- tc*match.wt.f(pos);
+  }
+  
+  rng <- c(from,to);
+  if(rng[1]<0) { stop("range extends into negative values") }
+  if(range(pos)[1]<0) { stop("position vector contains negative values") }
+
+  storage.mode(n) <- storage.mode(rng) <- storage.mode(bw) <- storage.mode(dw) <- storage.mode(step) <- "integer";
+  
+  spos <- rng[1]; storage.mode(spos) <- "double";
+
+  dlength <- floor((rng[2] - rng[1])/step) + 1; # length of output array
+  if(dlength<1) { stop("zero data range") }
+  dout <- numeric(dlength); storage.mode(dout) <- "double";
+  storage.mode(dlength) <- "integer";
+  .C("cdensum",n,pos,tc,spos,bw,dw,dlength,step,dout,DUP=F);
+  
+  if(return.x) {
+    return(list(x=c(rng[1],rng[1]+step*(dlength-1)),y=dout,step=step))
+  } else {
+    return(dout)
+  }
+}
+
+# count tags within sliding window of a specified size
+# vin - tag vector (postive values, pre-shifted)
+# window.size/window.step - window characteristics
+# tv - optional, pre-sorted, pre-trimmed tag vector
+window.tag.count <- function(vin,window.size,window.step=1,return.x=T,from=min(vin)+floor(window.size/2),to=max(vin)-floor(window.size/2),tv=NULL) {
+  whs <- floor(window.size/2);
+  # select tags with margins
+  if(is.null(tv)) {
+    tv <- sort(vin[vin>=from-whs-1 & vin<=to+whs+1])
+  }
+  storage.mode(tv) <- "double";
+  n <- length(tv)
+  nsteps <- ceiling((to-from)/window.step);
+  
+  storage.mode(n) <- storage.mode(nsteps) <- storage.mode(window.size) <- storage.mode(window.step) <- "integer";
+  
+  spos <- from; storage.mode(spos) <- "double";
+
+  if(nsteps<1) { stop("zero data range") }
+  #dout <- integer(nsteps); storage.mode(dout) <- "integer";
+  #.C("window_n_tags",n,tv,spos,window.size,window.step,nsteps,dout,DUP=F);
+  dout <- .Call("cwindow_n_tags",tv,spos,window.size,window.step,nsteps);
+  
+  if(return.x) {
+    return(list(x=c(from,from+(nsteps-1)*window.step),y=dout,step=window.step))
+  } else {
+    return(dout)
+  }
+}
+
+# count tags in windows around specified positions (pos)
+window.tag.count.around <- function(vin,window.size,pos,return.x=T,tc=NULL,sorted=F) {
+  if(is.null(tc)) {
+    tc <- table(vin);
+  }
+  if(!sorted) {
+    op <- rank(pos);
+    pos <- sort(pos);
+  }
+  storage.mode(pos) <- "double";
+  tpos <- as.integer(names(tc)); storage.mode(tpos) <- "double";
+  tc <- as.integer(tc); storage.mode(tc) <- "integer";
+  
+  whs <- floor(window.size/2);
+  
+  storage.mode(whs) <- "integer";
+  twc <- .Call("cwindow_n_tags_around",tpos,tc,pos,whs);
+  if(return.x) {
+    if(sorted) {
+      return(data.frame(x=pos,y=twc));
+    } else {
+      return(data.frame(x=pos[op],y=twc[op]));
+    }
+  } else {
+    if(sorted) {
+      return(twc);
+    } else {
+      return(twc[op]);
+    }
+  }
+}
+
+# given a tag vector (signed), identify and clean up (either remove or cap) singular positions that exceed local tag density
+# vin - tag vector
+# cap.fold - maximal fold over enrichment over local density allowed for a single tag position, at which the tag count is capped
+# eliminate.fold - max fold enrichment that, when exceeded, results in exclusion of all the tags at that position (e.g. counted as anomaly)
+# z.threshold - Z-score used to determine max allowed counts
+filter.singular.positions.by.local.density <- function(tags,window.size=200,cap.fold=4,eliminate.fold=10,z.threshold=3) {
+  # tabulate tag positions
+  if(length(tags)<2) { return(tags); };
+  
+  tc <- table(tags);
+  pos <- as.numeric(names(tc)); storage.mode(pos) <- "double";
+  tc <- as.integer(tc); storage.mode(tc) <- "integer";
+  n <- length(pos); 
+
+  whs <- floor(window.size/2);
+  
+  storage.mode(n) <- storage.mode(whs) <- "integer";
+  twc <- .Call("cwindow_n_tags_around",pos,tc,pos,whs);
+  twc <- (twc-tc+1)/window.size; # local density
+
+  pv <- pnorm(z.threshold,lower.tail=F)
+  # exclude
+  max.counts <- qpois(pv,twc*eliminate.fold,lower.tail=F)
+  tc[tc>max.counts] <- 0;
+  # cap
+  max.counts <- qpois(pv,twc*cap.fold,lower.tail=F)
+  ivi <- which(tc>max.counts);
+  tc[ivi] <- max.counts[ivi]+1;
+
+  # reconstruct tag vector
+  tv <- rep(pos,tc);
+  to <- order(abs(tv)); tv <- tv[to];
+  return(tv);
+}
+
+
+
+# calculates enrichment bounds using multiple background scales
+# ft - foreground tags (pre-shifted, positive)
+# bt - background tags
+# fws - foreground window size
+# bwsl - background window size list
+# step - window step
+# rng - from/to coordinates (to will be adjusted according to step)
+#
+# returns: a list with $x ($s $e $step), $lb vector and $mle vector ($ub if calculate.upper.bound=T)
+mbs.enrichment.bounds <- function(ft,bt,fws,bwsl,step=1,rng=NULL,alpha=0.05,calculate.upper.bound=F,bg.weight=length(ft)/length(bt),use.most.informative.scale=F,quick.calculation=F,pos=NULL) {
+  # determine range
+  if(is.null(rng)) {
+    rng <- range(range(ft));
+  }
+  # foreground counts
+  if(is.null(pos)) {
+    fwc <- window.tag.count(ft,fws,window.step=step,from=rng[1],to=rng[2],return.x=T);
+  } else {
+    fwc <- window.tag.count.around(ft,fws,pos,return.x=T)
+  }
+  fwc$y <- fwc$y+0.5;
+
+  zal <- qnorm(alpha/2,lower.tail=F);
+
+  # background counts
+  bt <- sort(bt);
+  if(!is.null(pos)) {
+    tc <- table(bt);
+  }
+  bgcm <- lapply(bwsl,function(bgws) {
+    if(is.null(pos)) {
+      window.tag.count(bt,bgws,window.step=step,from=rng[1],to=rng[2],return.x=F,tv=bt)+0.5;
+    } else {
+      window.tag.count.around(bt,bgws,pos,return.x=F,tc=tc)+0.5
+    }
+  })
+  if(!is.null(pos)) {
+    rm(tc);
+  }
+
+  # pick most informative scale
+  if(use.most.informative.scale) {
+    bgcm <- t(do.call(cbind,bgcm))
+    isi <- max.col(t((bgcm)/(bwsl/fws))) # add pseudo-counts to select lowest scale in case of a tie
+
+    bgc <- c(bgcm)[isi+dim(bgcm)[1]*(c(1:length(isi))-1)]
+
+    if(quick.calculation) {
+      rte <- fwc$y+bgc-0.25*zal*zal; rte[rte<0] <- 0;
+      dn <- bgc - 0.25*zal*zal;
+      lbm=(sqrt(fwc$y*bgc) - 0.5*zal*sqrt(rte))/dn;
+      ivi <- which(lbm<0);
+      lbm <- lbm*lbm*bwsl[isi]/fws/bg.weight;
+      lbm[rte<=0] <- 1;
+      lbm[dn<=0] <- 1;
+      lbm[ivi] <- 1;
+    } else {
+      lbm <- (fwc$y/bgc)*qf(1-alpha/2,2*fwc$y,2*bgc,lower.tail=F)*bwsl[isi]/fws/bg.weight;
+    }
+    
+    mle <- fwc$y/bgc*bwsl[isi]/fws/bg.weight; mle[is.nan(mle)] <- Inf; mle[is.na(mle)] <- Inf;
+    
+    rl <- list(x=list(s=fwc$x[1],e=fwc$x[2],step=fwc$step),lb=lbm,mle=mle);
+    
+    if(calculate.upper.bound) {
+      isi <- max.col(t((-bgcm)/(bwsl/fws))) # add pseudo-counts to select highest scale in case of a tie
+      bgc <- c(bgcm)[isi+dim(bgcm)[1]*(c(1:length(isi))-1)]
+
+      if(quick.calculation) {
+        ubm=(sqrt(fwc$y*bgc) + 0.5*zal*sqrt(rte))/dn;
+        ivi <- which(ubm<0);
+        ubm <- ubm*ubm*bwsl[isi]/fws/bg.weight;
+        ubm[rte<=0] <- 1;
+        ubm[ivi] <- 1;
+        lbm[dn<=0] <- 1;
+      } else {
+        ubm <- (fwc$y/bgc)*qf(alpha/2,2*fwc$y,2*bgc,lower.tail=F)*bwsl[isi]/fws/bg.weight;
+      }
+      rl <- c(rl,list(ub=ubm));
+    }
+    return(rl);
+    
+  } else {
+    # determine lower bounds
+    lbm <- lapply(c(1:length(bgcm)),function(i) {
+      nbg <- bgcm[[i]];
+      if(quick.calculation) {
+        rte <- fwc$y+nbg-0.25*zal*zal; rte[rte<0] <- 0;
+        dn <- (nbg - 0.25*zal*zal);
+        lbm=(sqrt(fwc$y*nbg) - 0.5*zal*sqrt(rte))/dn;
+        ivi <- which(lbm<0);  
+        lbm <- lbm*lbm*bwsl[i]/fws/bg.weight;
+        lbm[rte<=0] <- 1;
+        lbm[dn<=0] <- 1;
+        lbm[ivi] <- 1;
+        return(lbm);
+      } else {
+        return((fwc$y/nbg)*qf(1-alpha/2,2*fwc$y,2*nbg,lower.tail=F)*bwsl[i]/fws/bg.weight);
+      }
+    })
+    lbm <- do.call(pmin,lbm);
+
+    # calculate mle
+    #mle <- do.call(pmin,lapply(bgcm,function(bgc) fwc/bgc))
+    mle <- do.call(pmin,lapply(c(1:length(bgcm)),function(i) {
+      bgc <- bgcm[[i]];
+      x <- fwc$y/bgc*bwsl[i]/fws/bg.weight; x[is.nan(x)] <- Inf; x[is.na(x)] <- Inf; return(x);
+    }))
+
+    rl <- list(x=list(s=fwc$x[1],e=fwc$x[2],step=fwc$step),lb=lbm,mle=mle);
+    
+    if(calculate.upper.bound) {
+      # determine upper bound
+      ubm <- lapply(c(1:length(bgcm)),function(i) {
+        nbg <- bgcm[[i]];
+        if(quick.calculation) {
+          rte <- fwc$y+nbg-0.25*zal*zal; rte[rte<0] <- 0;
+          dn <- (nbg - 0.25*zal*zal);
+          ubm=(sqrt(fwc$y*nbg) + 0.5*zal*sqrt(rte))/dn;
+          ivi <- which(ubm<0);  
+          ubm <- ubm*ubm*bwsl[i]/fws/bg.weight;
+          ubm[rte<=0] <- 1;
+          ubm[dn<=0] <- 1;
+          ubm[ivi] <- 1;
+          return(ubm);
+        } else {
+          return((fwc$y/nbg)*qf(alpha/2,2*fwc$y,2*nbg,lower.tail=F)*bwsl[i]/fws/bg.weight);
+        }
+      })
+      ubm <- do.call(pmax,ubm);
+      rl <- c(rl,list(ub=ubm));
+    }
+
+    return(rl);
+  }
+}
+
+write.probe.wig <- function(chr,pos,val,fname,append=F,feature="M",probe.length=35,header=T) {
+  min.dist <- min(diff(pos));
+  if(probe.length>=min.dist) {
+    probe.length <- min.dist-1;
+    cat("warning: adjusted down wig segment length to",probe.length,"\n");
+  }
+  mdat <- data.frame(chr,as.integer(pos),as.integer(pos+probe.length),val)
+
+  if(header) {
+    write(paste("track type=wiggle_0 name=\"Bed Format\" description=\"",feature,"\" visibility=dense color=200,100,0 altColor=0,100,200 priority=20",sep=""),file=fname,append=append)
+    write.table(mdat,file=fname,col.names=F,row.names=F,quote=F,sep=" ",append=T);
+  } else {
+    write.table(mdat,file=fname,col.names=F,row.names=F,quote=F,sep=" ",append=append);
+  }
+  
+}
+
+# returns intersection of multiple region sets
+# each regionset needs to contain $s, $e and optional $v column
+regionset.intersection.c <- function(rsl,max.val=-1,do.union=F) {
+  # translate into position/flag form
+  rfl <- lapply(rsl,function(rs) {
+    rp <- c(rs$s,rs$e); rf <- c(rep(c(1,-1),each=length(rs$s)));
+    
+    ro <- order(rp);
+    rp <- rp[ro]; rf <- rf[ro];
+    if(!is.null(rs$v)) {
+      rv <- c(rs$v,rs$v)[ro];
+      return(data.frame(p=as.numeric(rp),f=as.integer(rf),v=as.numeric(rv)));
+    } else {
+      return(data.frame(p=as.numeric(rp),f=as.integer(rf)));
+    }
+  })
+  rfd <- data.frame(do.call(rbind,lapply(1:length(rfl),function(i) {
+    d <- rfl[[i]]; d$f <- d$f*i; return(d);
+  })))
+  rfd <- rfd[order(rfd$p),];
+  if(is.null(rfd$v)) { max.val <- 0; }
+  if(do.union) { ur <- 1; } else { ur <- 0; }; 
+  rl <- .Call("region_intersection",as.integer(length(rfl)),as.numeric(rfd$p),as.integer(rfd$f),as.numeric(rfd$v),as.integer(max.val),as.integer(ur));
+  return(data.frame(do.call(cbind,rl)));
+}
+
+
+# idenfity if binding peak falls within a larger region of significant tag enrichment, and if so record its booundaries
+add.broad.peak.regions <- function(chip.tags,input.tags,bp,window.size=500,z.thr=2) {
+  se <- find.significantly.enriched.regions(chip.tags,input.tags,window.size=window.size,z.thr=z.thr,poisson.z=0,poisson.ratio=0,either=F)
+  chrl <- names(bp$npl); names(chrl) <- chrl;
+  bnpl <- lapply(chrl,function(chr) {
+    npl <- bp$npl[[chr]];
+    if(is.null(npl) | dim(npl)[1]<1) {
+      return(npl);
+    }
+    pi <- points.within(npl$x,se[[chr]]$s,se[[chr]]$e,return.list=T);
+    
+    pm <- do.call(rbind,lapply(pi,function(rl) {
+      if(length(rl)>0) {
+        return(range(c(se[[chr]]$s[rl],se[[chr]]$e[rl])))
+      } else {
+        return(c(NA,NA));
+      }
+    }))
+
+    npl$rs <- pm[,1];
+    npl$re <- pm[,2];
+    return(npl);
+  })
+  bp$npl <- bnpl;
+  return(bp);
+}
+
+# writing out binding results in a narrowpeak format, incorporating broad region boundaries if they are present
+# if broad region info is not present, margin is used to determine region width. The default margin is equal
+# to the window half size used to call the binding peaks
+write.narrowpeak.binding <- function(bd,fname,margin=bd$whs,npeaks=NA) { # Anshul: added npeaks option
+  if(is.null(margin)) { margin <- 50; }
+  chrl <- names(bd$npl); names(chrl) <- chrl;
+  md <- do.call(rbind,lapply(chrl,function(chr) {
+    df <- bd$npl[[chr]];
+    x <- df$x;
+    rs <- df$rs; if(is.null(rs)) { rs <- rep(NA,length(x)) }
+    re <- df$re; if(is.null(re)) { re <- rep(NA,length(x)) }
+    #ivi <- which(is.na(rs)); if(any(ivi)) {rs[ivi] <- x[ivi]-margin;}
+    ivi <- which(is.na(rs)); if(any(ivi)) {rs[ivi] <- pmax(0,x[ivi]-margin);} # Anshul: added the pmax (0, ...) to avoid negative peak starts
+    ivi <- which(is.na(re)); if(any(ivi)) {re[ivi] <- x[ivi]+margin;}
+    #cbind(chr,rs,re,".","0",".",df$y,-1,format(df$fdr,scientific=T,digits=3),x-rs)
+    cbind(chr,rs,re,".","0",".",df$y,-1,-log10(df$fdr),x-rs) # Anshul: converted fdr to -log10    
+  }))
+  md <- md[order(as.numeric(md[,7]),decreasing=T),]
+  if (!is.na(npeaks)) { # Anshul: added this option to print a limited number of peaks
+    npeaks <- min(nrow(md),npeaks)
+  	md <- md[1:npeaks,]
+  }  
+  write.table(md,file=fname,col.names=F,row.names=F,quote=F,sep="\t",append=F);
+}
+
+
+get.broad.enrichment.clusters <- function(signal.data,control.data,window.size=1e3,z.thr=3, tag.shift=146/2,background.density.scaling=F, ... ) {
+  # find significantly enriched clusters
+  bg.weight <- dataset.density.ratio(signal.data,control.data,background.density.scaling=background.density.scaling);
+  se <- find.significantly.enriched.regions(signal.data,control.data,window.size=window.size,z.thr=z.thr,tag.shift=tag.shift, bg.weight=bg.weight, ...)
+  chrl <- names(se); names(chrl) <- chrl;
+  se <- lapply(chrl,function(chr) {
+    d <- se[[chr]];
+    if(length(d$s>1)) {
+      d <- regionset.intersection.c(list(d,d),do.union=T);
+      sc <- points.within(abs(signal.data[[chr]]+tag.shift),d$s,d$e,return.point.counts=T);
+      cc <- points.within(abs(control.data[[chr]]+tag.shift),d$s,d$e,return.point.counts=T);
+      d$rv <- log2((sc+1)/(cc+1)/bg.weight);
+      return(d);
+    } else {
+      return(d)
+    }
+  })
+}
+
+write.broadpeak.info <- function(bp,fname) {
+  chrl <- names(bp); names(chrl) <- chrl;
+  chrl <- chrl[unlist(lapply(bp,function(d) length(d$s)))>0]
+  md <- do.call(rbind,lapply(chrl,function(chr) {
+    df <- bp[[chr]];
+    cbind(chr,df$s,df$e,".","0",".",df$rv,-1,-1)
+  }))
+  md <- md[order(as.numeric(md[,7]),decreasing=T),]
+  write.table(md,file=fname,col.names=F,row.names=F,quote=F,sep="\t",append=F);
+}
+
+
+get.clusters2 <- function(x,CL)  {
+  temp <- which(diff(x) != 0)
+  begin <- c(1, temp + 1)
+  end <- c(temp, length(x))
+  size <- end - begin + 1
+
+  begin <- begin[size >= CL]
+  end <- end[size >= CL]
+  size <- size[size >= CL]
+
+  size <- size[x[end] != 0]
+  begin <- begin[x[end] != 0]
+  end <- end[x[end] != 0]
+
+  return (list(size=size,begin=begin,end=end))
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/configure	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,3856 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.63 for SPP 1.7.
+#
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+# 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## --------------------- ##
+## M4sh Initialization.  ##
+## --------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in
+  *posix*) set -o posix ;;
+esac
+
+fi
+
+
+
+
+# PATH needs CR
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+if (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='printf %s\n'
+  as_echo_n='printf %s'
+else
+  if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+    as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+    as_echo_n='/usr/ucb/echo -n'
+  else
+    as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+    as_echo_n_body='eval
+      arg=$1;
+      case $arg in
+      *"$as_nl"*)
+	expr "X$arg" : "X\\(.*\\)$as_nl";
+	arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+      esac;
+      expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+    '
+    export as_echo_n_body
+    as_echo_n='sh -c $as_echo_n_body as_echo'
+  fi
+  export as_echo_body
+  as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+  as_unset=unset
+else
+  as_unset=false
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+case $0 in
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  { (exit 1); exit 1; }
+fi
+
+# Work around bugs in pre-3.0 UWIN ksh.
+for as_var in ENV MAIL MAILPATH
+do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# CDPATH.
+$as_unset CDPATH
+
+
+if test "x$CONFIG_SHELL" = x; then
+  if (eval ":") 2>/dev/null; then
+  as_have_required=yes
+else
+  as_have_required=no
+fi
+
+  if test $as_have_required = yes &&	 (eval ":
+(as_func_return () {
+  (exit \$1)
+}
+as_func_success () {
+  as_func_return 0
+}
+as_func_failure () {
+  as_func_return 1
+}
+as_func_ret_success () {
+  return 0
+}
+as_func_ret_failure () {
+  return 1
+}
+
+exitcode=0
+if as_func_success; then
+  :
+else
+  exitcode=1
+  echo as_func_success failed.
+fi
+
+if as_func_failure; then
+  exitcode=1
+  echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+  :
+else
+  exitcode=1
+  echo as_func_ret_success failed.
+fi
+
+if as_func_ret_failure; then
+  exitcode=1
+  echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = \"\$1\" ); then
+  :
+else
+  exitcode=1
+  echo positional parameters were not saved.
+fi
+
+test \$exitcode = 0) || { (exit 1); exit 1; }
+
+(
+  as_lineno_1=\$LINENO
+  as_lineno_2=\$LINENO
+  test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" &&
+  test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; }
+") 2> /dev/null; then
+  :
+else
+  as_candidate_shells=
+    as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  case $as_dir in
+	 /*)
+	   for as_base in sh bash ksh sh5; do
+	     as_candidate_shells="$as_candidate_shells $as_dir/$as_base"
+	   done;;
+       esac
+done
+IFS=$as_save_IFS
+
+
+      for as_shell in $as_candidate_shells $SHELL; do
+	 # Try only shells that exist, to save several forks.
+	 if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+		{ ("$as_shell") 2> /dev/null <<\_ASEOF
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in
+  *posix*) set -o posix ;;
+esac
+
+fi
+
+
+:
+_ASEOF
+}; then
+  CONFIG_SHELL=$as_shell
+	       as_have_required=yes
+	       if { "$as_shell" 2> /dev/null <<\_ASEOF
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in
+  *posix*) set -o posix ;;
+esac
+
+fi
+
+
+:
+(as_func_return () {
+  (exit $1)
+}
+as_func_success () {
+  as_func_return 0
+}
+as_func_failure () {
+  as_func_return 1
+}
+as_func_ret_success () {
+  return 0
+}
+as_func_ret_failure () {
+  return 1
+}
+
+exitcode=0
+if as_func_success; then
+  :
+else
+  exitcode=1
+  echo as_func_success failed.
+fi
+
+if as_func_failure; then
+  exitcode=1
+  echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+  :
+else
+  exitcode=1
+  echo as_func_ret_success failed.
+fi
+
+if as_func_ret_failure; then
+  exitcode=1
+  echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = "$1" ); then
+  :
+else
+  exitcode=1
+  echo positional parameters were not saved.
+fi
+
+test $exitcode = 0) || { (exit 1); exit 1; }
+
+(
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; }
+
+_ASEOF
+}; then
+  break
+fi
+
+fi
+
+      done
+
+      if test "x$CONFIG_SHELL" != x; then
+  for as_var in BASH_ENV ENV
+	do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+	done
+	export CONFIG_SHELL
+	exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"}
+fi
+
+
+    if test $as_have_required = no; then
+  echo This script requires a shell more modern than all the
+      echo shells that I found on your system.  Please install a
+      echo modern shell, or manually run the script under such a
+      echo shell if you do have one.
+      { (exit 1); exit 1; }
+fi
+
+
+fi
+
+fi
+
+
+
+(eval "as_func_return () {
+  (exit \$1)
+}
+as_func_success () {
+  as_func_return 0
+}
+as_func_failure () {
+  as_func_return 1
+}
+as_func_ret_success () {
+  return 0
+}
+as_func_ret_failure () {
+  return 1
+}
+
+exitcode=0
+if as_func_success; then
+  :
+else
+  exitcode=1
+  echo as_func_success failed.
+fi
+
+if as_func_failure; then
+  exitcode=1
+  echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+  :
+else
+  exitcode=1
+  echo as_func_ret_success failed.
+fi
+
+if as_func_ret_failure; then
+  exitcode=1
+  echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = \"\$1\" ); then
+  :
+else
+  exitcode=1
+  echo positional parameters were not saved.
+fi
+
+test \$exitcode = 0") || {
+  echo No shell found that supports shell functions.
+  echo Please tell bug-autoconf@gnu.org about your system,
+  echo including any error possibly output before this message.
+  echo This can help us improve future autoconf versions.
+  echo Configuration will now proceed without shell functions.
+}
+
+
+
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || {
+
+  # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+  # uniformly replaced by the line number.  The first 'sed' inserts a
+  # line-number line after each line using $LINENO; the second 'sed'
+  # does the real work.  The second script uses 'N' to pair each
+  # line-number line with the line containing $LINENO, and appends
+  # trailing '-' during substitution so that $LINENO is not a special
+  # case at line end.
+  # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+  # scripts with optimization help from Paolo Bonzini.  Blame Lee
+  # E. McMahon (1931-1989) for sed's syntax.  :-)
+  sed -n '
+    p
+    /[$]LINENO/=
+  ' <$as_myself |
+    sed '
+      s/[$]LINENO.*/&-/
+      t lineno
+      b
+      :lineno
+      N
+      :loop
+      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+      t loop
+      s/-\n.*//
+    ' >$as_me.lineno &&
+  chmod +x "$as_me.lineno" ||
+    { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+   { (exit 1); exit 1; }; }
+
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensitive to this).
+  . "./$as_me.lineno"
+  # Exit status is that of the last command.
+  exit
+}
+
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in
+-n*)
+  case `echo 'x\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  *)   ECHO_C='\c';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+    # In both cases, we have to default to `cp -p'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -p'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -p'
+  fi
+else
+  as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p=:
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+if test -x / >/dev/null 2>&1; then
+  as_test_x='test -x'
+else
+  if ls -dL / >/dev/null 2>&1; then
+    as_ls_L_option=L
+  else
+    as_ls_L_option=
+  fi
+  as_test_x='
+    eval sh -c '\''
+      if test -d "$1"; then
+	test -d "$1/.";
+      else
+	case $1 in
+	-*)set "./$1";;
+	esac;
+	case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in
+	???[sx]*):;;*)false;;esac;fi
+    '\'' sh
+  '
+fi
+as_executable_p=$as_test_x
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+
+exec 7<&0 </dev/null 6>&1
+
+# Name of the host.
+# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_clean_files=
+ac_config_libobj_dir=.
+LIBOBJS=
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+# Identity of this package.
+PACKAGE_NAME='SPP'
+PACKAGE_TARNAME='spp'
+PACKAGE_VERSION='1.7'
+PACKAGE_STRING='SPP 1.7'
+PACKAGE_BUGREPORT=''
+
+ac_subst_vars='LTLIBOBJS
+LIBOBJS
+HAVE_LIBBZ2
+OBJEXT
+EXEEXT
+ac_ct_CC
+CPPFLAGS
+LDFLAGS
+CFLAGS
+CC
+target_alias
+host_alias
+build_alias
+LIBS
+ECHO_T
+ECHO_N
+ECHO_C
+DEFS
+mandir
+localedir
+libdir
+psdir
+pdfdir
+dvidir
+htmldir
+infodir
+docdir
+oldincludedir
+includedir
+localstatedir
+sharedstatedir
+sysconfdir
+datadir
+datarootdir
+libexecdir
+sbindir
+bindir
+program_transform_name
+prefix
+exec_prefix
+PACKAGE_BUGREPORT
+PACKAGE_STRING
+PACKAGE_VERSION
+PACKAGE_TARNAME
+PACKAGE_NAME
+PATH_SEPARATOR
+SHELL'
+ac_subst_files=''
+ac_user_opts='
+enable_option_checking
+'
+      ac_precious_vars='build_alias
+host_alias
+target_alias
+CC
+CFLAGS
+LDFLAGS
+LIBS
+CPPFLAGS'
+
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+ac_unrecognized_opts=
+ac_unrecognized_sep=
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+# (The list follows the same order as the GNU Coding Standards.)
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datarootdir='${prefix}/share'
+datadir='${datarootdir}'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
+infodir='${datarootdir}/info'
+htmldir='${docdir}'
+dvidir='${docdir}'
+pdfdir='${docdir}'
+psdir='${docdir}'
+libdir='${exec_prefix}/lib'
+localedir='${datarootdir}/locale'
+mandir='${datarootdir}/man'
+
+ac_prev=
+ac_dashdash=
+for ac_option
+do
+  # If the previous option needs an argument, assign it.
+  if test -n "$ac_prev"; then
+    eval $ac_prev=\$ac_option
+    ac_prev=
+    continue
+  fi
+
+  case $ac_option in
+  *=*)	ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+  *)	ac_optarg=yes ;;
+  esac
+
+  # Accept the important Cygnus configure options, so we can diagnose typos.
+
+  case $ac_dashdash$ac_option in
+  --)
+    ac_dashdash=yes ;;
+
+  -bindir | --bindir | --bindi | --bind | --bin | --bi)
+    ac_prev=bindir ;;
+  -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+    bindir=$ac_optarg ;;
+
+  -build | --build | --buil | --bui | --bu)
+    ac_prev=build_alias ;;
+  -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+    build_alias=$ac_optarg ;;
+
+  -cache-file | --cache-file | --cache-fil | --cache-fi \
+  | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+    ac_prev=cache_file ;;
+  -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+  | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+    cache_file=$ac_optarg ;;
+
+  --config-cache | -C)
+    cache_file=config.cache ;;
+
+  -datadir | --datadir | --datadi | --datad)
+    ac_prev=datadir ;;
+  -datadir=* | --datadir=* | --datadi=* | --datad=*)
+    datadir=$ac_optarg ;;
+
+  -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
+  | --dataroo | --dataro | --datar)
+    ac_prev=datarootdir ;;
+  -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
+  | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
+    datarootdir=$ac_optarg ;;
+
+  -disable-* | --disable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      { $as_echo "$as_me: error: invalid feature name: $ac_useropt" >&2
+   { (exit 1); exit 1; }; }
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=no ;;
+
+  -docdir | --docdir | --docdi | --doc | --do)
+    ac_prev=docdir ;;
+  -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
+    docdir=$ac_optarg ;;
+
+  -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
+    ac_prev=dvidir ;;
+  -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
+    dvidir=$ac_optarg ;;
+
+  -enable-* | --enable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      { $as_echo "$as_me: error: invalid feature name: $ac_useropt" >&2
+   { (exit 1); exit 1; }; }
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=\$ac_optarg ;;
+
+  -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+  | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+  | --exec | --exe | --ex)
+    ac_prev=exec_prefix ;;
+  -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+  | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+  | --exec=* | --exe=* | --ex=*)
+    exec_prefix=$ac_optarg ;;
+
+  -gas | --gas | --ga | --g)
+    # Obsolete; use --with-gas.
+    with_gas=yes ;;
+
+  -help | --help | --hel | --he | -h)
+    ac_init_help=long ;;
+  -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+    ac_init_help=recursive ;;
+  -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+    ac_init_help=short ;;
+
+  -host | --host | --hos | --ho)
+    ac_prev=host_alias ;;
+  -host=* | --host=* | --hos=* | --ho=*)
+    host_alias=$ac_optarg ;;
+
+  -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
+    ac_prev=htmldir ;;
+  -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
+  | --ht=*)
+    htmldir=$ac_optarg ;;
+
+  -includedir | --includedir | --includedi | --included | --include \
+  | --includ | --inclu | --incl | --inc)
+    ac_prev=includedir ;;
+  -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+  | --includ=* | --inclu=* | --incl=* | --inc=*)
+    includedir=$ac_optarg ;;
+
+  -infodir | --infodir | --infodi | --infod | --info | --inf)
+    ac_prev=infodir ;;
+  -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+    infodir=$ac_optarg ;;
+
+  -libdir | --libdir | --libdi | --libd)
+    ac_prev=libdir ;;
+  -libdir=* | --libdir=* | --libdi=* | --libd=*)
+    libdir=$ac_optarg ;;
+
+  -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+  | --libexe | --libex | --libe)
+    ac_prev=libexecdir ;;
+  -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+  | --libexe=* | --libex=* | --libe=*)
+    libexecdir=$ac_optarg ;;
+
+  -localedir | --localedir | --localedi | --localed | --locale)
+    ac_prev=localedir ;;
+  -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
+    localedir=$ac_optarg ;;
+
+  -localstatedir | --localstatedir | --localstatedi | --localstated \
+  | --localstate | --localstat | --localsta | --localst | --locals)
+    ac_prev=localstatedir ;;
+  -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+  | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
+    localstatedir=$ac_optarg ;;
+
+  -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+    ac_prev=mandir ;;
+  -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+    mandir=$ac_optarg ;;
+
+  -nfp | --nfp | --nf)
+    # Obsolete; use --without-fp.
+    with_fp=no ;;
+
+  -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+  | --no-cr | --no-c | -n)
+    no_create=yes ;;
+
+  -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+    no_recursion=yes ;;
+
+  -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+  | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+  | --oldin | --oldi | --old | --ol | --o)
+    ac_prev=oldincludedir ;;
+  -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+  | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+  | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+    oldincludedir=$ac_optarg ;;
+
+  -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+    ac_prev=prefix ;;
+  -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+    prefix=$ac_optarg ;;
+
+  -program-prefix | --program-prefix | --program-prefi | --program-pref \
+  | --program-pre | --program-pr | --program-p)
+    ac_prev=program_prefix ;;
+  -program-prefix=* | --program-prefix=* | --program-prefi=* \
+  | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+    program_prefix=$ac_optarg ;;
+
+  -program-suffix | --program-suffix | --program-suffi | --program-suff \
+  | --program-suf | --program-su | --program-s)
+    ac_prev=program_suffix ;;
+  -program-suffix=* | --program-suffix=* | --program-suffi=* \
+  | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+    program_suffix=$ac_optarg ;;
+
+  -program-transform-name | --program-transform-name \
+  | --program-transform-nam | --program-transform-na \
+  | --program-transform-n | --program-transform- \
+  | --program-transform | --program-transfor \
+  | --program-transfo | --program-transf \
+  | --program-trans | --program-tran \
+  | --progr-tra | --program-tr | --program-t)
+    ac_prev=program_transform_name ;;
+  -program-transform-name=* | --program-transform-name=* \
+  | --program-transform-nam=* | --program-transform-na=* \
+  | --program-transform-n=* | --program-transform-=* \
+  | --program-transform=* | --program-transfor=* \
+  | --program-transfo=* | --program-transf=* \
+  | --program-trans=* | --program-tran=* \
+  | --progr-tra=* | --program-tr=* | --program-t=*)
+    program_transform_name=$ac_optarg ;;
+
+  -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
+    ac_prev=pdfdir ;;
+  -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
+    pdfdir=$ac_optarg ;;
+
+  -psdir | --psdir | --psdi | --psd | --ps)
+    ac_prev=psdir ;;
+  -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
+    psdir=$ac_optarg ;;
+
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil)
+    silent=yes ;;
+
+  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+    ac_prev=sbindir ;;
+  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+  | --sbi=* | --sb=*)
+    sbindir=$ac_optarg ;;
+
+  -sharedstatedir | --sharedstatedir | --sharedstatedi \
+  | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+  | --sharedst | --shareds | --shared | --share | --shar \
+  | --sha | --sh)
+    ac_prev=sharedstatedir ;;
+  -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+  | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+  | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+  | --sha=* | --sh=*)
+    sharedstatedir=$ac_optarg ;;
+
+  -site | --site | --sit)
+    ac_prev=site ;;
+  -site=* | --site=* | --sit=*)
+    site=$ac_optarg ;;
+
+  -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+    ac_prev=srcdir ;;
+  -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+    srcdir=$ac_optarg ;;
+
+  -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+  | --syscon | --sysco | --sysc | --sys | --sy)
+    ac_prev=sysconfdir ;;
+  -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+  | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+    sysconfdir=$ac_optarg ;;
+
+  -target | --target | --targe | --targ | --tar | --ta | --t)
+    ac_prev=target_alias ;;
+  -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+    target_alias=$ac_optarg ;;
+
+  -v | -verbose | --verbose | --verbos | --verbo | --verb)
+    verbose=yes ;;
+
+  -version | --version | --versio | --versi | --vers | -V)
+    ac_init_version=: ;;
+
+  -with-* | --with-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      { $as_echo "$as_me: error: invalid package name: $ac_useropt" >&2
+   { (exit 1); exit 1; }; }
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=\$ac_optarg ;;
+
+  -without-* | --without-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      { $as_echo "$as_me: error: invalid package name: $ac_useropt" >&2
+   { (exit 1); exit 1; }; }
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig"
+	 ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=no ;;
+
+  --x)
+    # Obsolete; use --with-x.
+    with_x=yes ;;
+
+  -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+  | --x-incl | --x-inc | --x-in | --x-i)
+    ac_prev=x_includes ;;
+  -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+  | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+    x_includes=$ac_optarg ;;
+
+  -x-libraries | --x-libraries | --x-librarie | --x-librari \
+  | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+    ac_prev=x_libraries ;;
+  -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+  | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+    x_libraries=$ac_optarg ;;
+
+  -*) { $as_echo "$as_me: error: unrecognized option: $ac_option
+Try \`$0 --help' for more information." >&2
+   { (exit 1); exit 1; }; }
+    ;;
+
+  *=*)
+    ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null &&
+      { $as_echo "$as_me: error: invalid variable name: $ac_envvar" >&2
+   { (exit 1); exit 1; }; }
+    eval $ac_envvar=\$ac_optarg
+    export $ac_envvar ;;
+
+  *)
+    # FIXME: should be removed in autoconf 3.0.
+    $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+    expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+      $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+    : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
+    ;;
+
+  esac
+done
+
+if test -n "$ac_prev"; then
+  ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+  { $as_echo "$as_me: error: missing argument to $ac_option" >&2
+   { (exit 1); exit 1; }; }
+fi
+
+if test -n "$ac_unrecognized_opts"; then
+  case $enable_option_checking in
+    no) ;;
+    fatal) { $as_echo "$as_me: error: unrecognized options: $ac_unrecognized_opts" >&2
+   { (exit 1); exit 1; }; } ;;
+    *)     $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
+  esac
+fi
+
+# Check all directory arguments for consistency.
+for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
+		datadir sysconfdir sharedstatedir localstatedir includedir \
+		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
+		libdir localedir mandir
+do
+  eval ac_val=\$$ac_var
+  # Remove trailing slashes.
+  case $ac_val in
+    */ )
+      ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'`
+      eval $ac_var=\$ac_val;;
+  esac
+  # Be sure to have absolute directory names.
+  case $ac_val in
+    [\\/$]* | ?:[\\/]* )  continue;;
+    NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
+  esac
+  { $as_echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+   { (exit 1); exit 1; }; }
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+  if test "x$build_alias" = x; then
+    cross_compiling=maybe
+    $as_echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
+    If a cross compiler is detected then cross compile mode will be used." >&2
+  elif test "x$build_alias" != "x$host_alias"; then
+    cross_compiling=yes
+  fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+ac_pwd=`pwd` && test -n "$ac_pwd" &&
+ac_ls_di=`ls -di .` &&
+ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
+  { $as_echo "$as_me: error: working directory cannot be determined" >&2
+   { (exit 1); exit 1; }; }
+test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
+  { $as_echo "$as_me: error: pwd does not report name of working directory" >&2
+   { (exit 1); exit 1; }; }
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+  ac_srcdir_defaulted=yes
+  # Try the directory containing this script, then the parent directory.
+  ac_confdir=`$as_dirname -- "$as_myself" ||
+$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_myself" : 'X\(//\)[^/]' \| \
+	 X"$as_myself" : 'X\(//\)$' \| \
+	 X"$as_myself" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_myself" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  srcdir=$ac_confdir
+  if test ! -r "$srcdir/$ac_unique_file"; then
+    srcdir=..
+  fi
+else
+  ac_srcdir_defaulted=no
+fi
+if test ! -r "$srcdir/$ac_unique_file"; then
+  test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
+  { $as_echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2
+   { (exit 1); exit 1; }; }
+fi
+ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
+ac_abs_confdir=`(
+	cd "$srcdir" && test -r "./$ac_unique_file" || { $as_echo "$as_me: error: $ac_msg" >&2
+   { (exit 1); exit 1; }; }
+	pwd)`
+# When building in place, set srcdir=.
+if test "$ac_abs_confdir" = "$ac_pwd"; then
+  srcdir=.
+fi
+# Remove unnecessary trailing slashes from srcdir.
+# Double slashes in file names in object file debugging info
+# mess up M-x gdb in Emacs.
+case $srcdir in
+*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
+esac
+for ac_var in $ac_precious_vars; do
+  eval ac_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_env_${ac_var}_value=\$${ac_var}
+  eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_cv_env_${ac_var}_value=\$${ac_var}
+done
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+  # Omit some internal or obsolete options to make the list less imposing.
+  # This message is too long to be a string in the A/UX 3.1 sh.
+  cat <<_ACEOF
+\`configure' configures SPP 1.7 to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE.  See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+  -h, --help              display this help and exit
+      --help=short        display options specific to this package
+      --help=recursive    display the short help of all the included packages
+  -V, --version           display version information and exit
+  -q, --quiet, --silent   do not print \`checking...' messages
+      --cache-file=FILE   cache test results in FILE [disabled]
+  -C, --config-cache      alias for \`--cache-file=config.cache'
+  -n, --no-create         do not create output files
+      --srcdir=DIR        find the sources in DIR [configure dir or \`..']
+
+Installation directories:
+  --prefix=PREFIX         install architecture-independent files in PREFIX
+                          [$ac_default_prefix]
+  --exec-prefix=EPREFIX   install architecture-dependent files in EPREFIX
+                          [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc.  You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+  --bindir=DIR            user executables [EPREFIX/bin]
+  --sbindir=DIR           system admin executables [EPREFIX/sbin]
+  --libexecdir=DIR        program executables [EPREFIX/libexec]
+  --sysconfdir=DIR        read-only single-machine data [PREFIX/etc]
+  --sharedstatedir=DIR    modifiable architecture-independent data [PREFIX/com]
+  --localstatedir=DIR     modifiable single-machine data [PREFIX/var]
+  --libdir=DIR            object code libraries [EPREFIX/lib]
+  --includedir=DIR        C header files [PREFIX/include]
+  --oldincludedir=DIR     C header files for non-gcc [/usr/include]
+  --datarootdir=DIR       read-only arch.-independent data root [PREFIX/share]
+  --datadir=DIR           read-only architecture-independent data [DATAROOTDIR]
+  --infodir=DIR           info documentation [DATAROOTDIR/info]
+  --localedir=DIR         locale-dependent data [DATAROOTDIR/locale]
+  --mandir=DIR            man documentation [DATAROOTDIR/man]
+  --docdir=DIR            documentation root [DATAROOTDIR/doc/spp]
+  --htmldir=DIR           html documentation [DOCDIR]
+  --dvidir=DIR            dvi documentation [DOCDIR]
+  --pdfdir=DIR            pdf documentation [DOCDIR]
+  --psdir=DIR             ps documentation [DOCDIR]
+_ACEOF
+
+  cat <<\_ACEOF
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+  case $ac_init_help in
+     short | recursive ) echo "Configuration of SPP 1.7:";;
+   esac
+  cat <<\_ACEOF
+
+Some influential environment variables:
+  CC          C compiler command
+  CFLAGS      C compiler flags
+  LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
+              nonstandard directory <lib dir>
+  LIBS        libraries to pass to the linker, e.g. -l<library>
+  CPPFLAGS    C/C++/Objective C preprocessor flags, e.g. -I<include dir> if
+              you have headers in a nonstandard directory <include dir>
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+_ACEOF
+ac_status=$?
+fi
+
+if test "$ac_init_help" = "recursive"; then
+  # If there are subdirs, report their specific --help.
+  for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+    test -d "$ac_dir" ||
+      { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } ||
+      continue
+    ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+    cd "$ac_dir" || { ac_status=$?; continue; }
+    # Check for guested configure.
+    if test -f "$ac_srcdir/configure.gnu"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure.gnu" --help=recursive
+    elif test -f "$ac_srcdir/configure"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure" --help=recursive
+    else
+      $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+    fi || ac_status=$?
+    cd "$ac_pwd" || { ac_status=$?; break; }
+  done
+fi
+
+test -n "$ac_init_help" && exit $ac_status
+if $ac_init_version; then
+  cat <<\_ACEOF
+SPP configure 1.7
+generated by GNU Autoconf 2.63
+
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+  exit
+fi
+cat >config.log <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by SPP $as_me 1.7, which was
+generated by GNU Autoconf 2.63.  Invocation command line was
+
+  $ $0 $@
+
+_ACEOF
+exec 5>>config.log
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null     || echo unknown`
+
+/bin/arch              = `(/bin/arch) 2>/dev/null              || echo unknown`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null       || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+/usr/bin/hostinfo      = `(/usr/bin/hostinfo) 2>/dev/null      || echo unknown`
+/bin/machine           = `(/bin/machine) 2>/dev/null           || echo unknown`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null       || echo unknown`
+/bin/universe          = `(/bin/universe) 2>/dev/null          || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  $as_echo "PATH: $as_dir"
+done
+IFS=$as_save_IFS
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+  for ac_arg
+  do
+    case $ac_arg in
+    -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+    -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+    | -silent | --silent | --silen | --sile | --sil)
+      continue ;;
+    *\'*)
+      ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    case $ac_pass in
+    1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;;
+    2)
+      ac_configure_args1="$ac_configure_args1 '$ac_arg'"
+      if test $ac_must_keep_next = true; then
+	ac_must_keep_next=false # Got value, back to normal.
+      else
+	case $ac_arg in
+	  *=* | --config-cache | -C | -disable-* | --disable-* \
+	  | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+	  | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+	  | -with-* | --with-* | -without-* | --without-* | --x)
+	    case "$ac_configure_args0 " in
+	      "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+	    esac
+	    ;;
+	  -* ) ac_must_keep_next=true ;;
+	esac
+      fi
+      ac_configure_args="$ac_configure_args '$ac_arg'"
+      ;;
+    esac
+  done
+done
+$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; }
+$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; }
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log.  We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Use '\'' to represent an apostrophe within the trap.
+# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
+trap 'exit_status=$?
+  # Save into config.log some information that might help in debugging.
+  {
+    echo
+
+    cat <<\_ASBOX
+## ---------------- ##
+## Cache variables. ##
+## ---------------- ##
+_ASBOX
+    echo
+    # The following way of writing the cache mishandles newlines in values,
+(
+  for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { $as_echo "$as_me:$LINENO: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) $as_unset $ac_var ;;
+      esac ;;
+    esac
+  done
+  (set) 2>&1 |
+    case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      sed -n \
+	"s/'\''/'\''\\\\'\'''\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
+      ;; #(
+    *)
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+)
+    echo
+
+    cat <<\_ASBOX
+## ----------------- ##
+## Output variables. ##
+## ----------------- ##
+_ASBOX
+    echo
+    for ac_var in $ac_subst_vars
+    do
+      eval ac_val=\$$ac_var
+      case $ac_val in
+      *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+      esac
+      $as_echo "$ac_var='\''$ac_val'\''"
+    done | sort
+    echo
+
+    if test -n "$ac_subst_files"; then
+      cat <<\_ASBOX
+## ------------------- ##
+## File substitutions. ##
+## ------------------- ##
+_ASBOX
+      echo
+      for ac_var in $ac_subst_files
+      do
+	eval ac_val=\$$ac_var
+	case $ac_val in
+	*\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+	esac
+	$as_echo "$ac_var='\''$ac_val'\''"
+      done | sort
+      echo
+    fi
+
+    if test -s confdefs.h; then
+      cat <<\_ASBOX
+## ----------- ##
+## confdefs.h. ##
+## ----------- ##
+_ASBOX
+      echo
+      cat confdefs.h
+      echo
+    fi
+    test "$ac_signal" != 0 &&
+      $as_echo "$as_me: caught signal $ac_signal"
+    $as_echo "$as_me: exit $exit_status"
+  } >&5
+  rm -f core *.core core.conftest.* &&
+    rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
+    exit $exit_status
+' 0
+for ac_signal in 1 2 13 15; do
+  trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -f -r conftest* confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer an explicitly selected file to automatically selected ones.
+ac_site_file1=NONE
+ac_site_file2=NONE
+if test -n "$CONFIG_SITE"; then
+  ac_site_file1=$CONFIG_SITE
+elif test "x$prefix" != xNONE; then
+  ac_site_file1=$prefix/share/config.site
+  ac_site_file2=$prefix/etc/config.site
+else
+  ac_site_file1=$ac_default_prefix/share/config.site
+  ac_site_file2=$ac_default_prefix/etc/config.site
+fi
+for ac_site_file in "$ac_site_file1" "$ac_site_file2"
+do
+  test "x$ac_site_file" = xNONE && continue
+  if test -r "$ac_site_file"; then
+    { $as_echo "$as_me:$LINENO: loading site script $ac_site_file" >&5
+$as_echo "$as_me: loading site script $ac_site_file" >&6;}
+    sed 's/^/| /' "$ac_site_file" >&5
+    . "$ac_site_file"
+  fi
+done
+
+if test -r "$cache_file"; then
+  # Some versions of bash will fail to source /dev/null (special
+  # files actually), so we avoid doing that.
+  if test -f "$cache_file"; then
+    { $as_echo "$as_me:$LINENO: loading cache $cache_file" >&5
+$as_echo "$as_me: loading cache $cache_file" >&6;}
+    case $cache_file in
+      [\\/]* | ?:[\\/]* ) . "$cache_file";;
+      *)                      . "./$cache_file";;
+    esac
+  fi
+else
+  { $as_echo "$as_me:$LINENO: creating cache $cache_file" >&5
+$as_echo "$as_me: creating cache $cache_file" >&6;}
+  >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in $ac_precious_vars; do
+  eval ac_old_set=\$ac_cv_env_${ac_var}_set
+  eval ac_new_set=\$ac_env_${ac_var}_set
+  eval ac_old_val=\$ac_cv_env_${ac_var}_value
+  eval ac_new_val=\$ac_env_${ac_var}_value
+  case $ac_old_set,$ac_new_set in
+    set,)
+      { $as_echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,set)
+      { $as_echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5
+$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,);;
+    *)
+      if test "x$ac_old_val" != "x$ac_new_val"; then
+	# differences in whitespace do not lead to failure.
+	ac_old_val_w=`echo x $ac_old_val`
+	ac_new_val_w=`echo x $ac_new_val`
+	if test "$ac_old_val_w" != "$ac_new_val_w"; then
+	  { $as_echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5
+$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+	  ac_cache_corrupted=:
+	else
+	  { $as_echo "$as_me:$LINENO: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5
+$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;}
+	  eval $ac_var=\$ac_old_val
+	fi
+	{ $as_echo "$as_me:$LINENO:   former value:  \`$ac_old_val'" >&5
+$as_echo "$as_me:   former value:  \`$ac_old_val'" >&2;}
+	{ $as_echo "$as_me:$LINENO:   current value: \`$ac_new_val'" >&5
+$as_echo "$as_me:   current value: \`$ac_new_val'" >&2;}
+      fi;;
+  esac
+  # Pass precious variables to config.status.
+  if test "$ac_new_set" = set; then
+    case $ac_new_val in
+    *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+    *) ac_arg=$ac_var=$ac_new_val ;;
+    esac
+    case " $ac_configure_args " in
+      *" '$ac_arg' "*) ;; # Avoid dups.  Use of quotes ensures accuracy.
+      *) ac_configure_args="$ac_configure_args '$ac_arg'" ;;
+    esac
+  fi
+done
+if $ac_cache_corrupted; then
+  { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+  { $as_echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5
+$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+  { { $as_echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5
+$as_echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_CC="${ac_tool_prefix}gcc"
+    $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:$LINENO: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:$LINENO: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+  ac_ct_CC=$CC
+  # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_CC="gcc"
+    $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+  { $as_echo "$as_me:$LINENO: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+else
+  CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+          if test -n "$ac_tool_prefix"; then
+    # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_CC="${ac_tool_prefix}cc"
+    $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:$LINENO: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:$LINENO: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  fi
+fi
+if test -z "$CC"; then
+  # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+  ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+       ac_prog_rejected=yes
+       continue
+     fi
+    ac_cv_prog_CC="cc"
+    $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+  # We found a bogon in the path, so make sure we never use it.
+  set dummy $ac_cv_prog_CC
+  shift
+  if test $# != 0; then
+    # We chose a different compiler from the bogus one.
+    # However, it has the same basename, so the bogon will be chosen
+    # first if we set CC to just the basename; use the full file name.
+    shift
+    ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
+  fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:$LINENO: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:$LINENO: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$CC"; then
+  if test -n "$ac_tool_prefix"; then
+  for ac_prog in cl.exe
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+    $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { $as_echo "$as_me:$LINENO: result: $CC" >&5
+$as_echo "$CC" >&6; }
+else
+  { $as_echo "$as_me:$LINENO: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+    test -n "$CC" && break
+  done
+fi
+if test -z "$CC"; then
+  ac_ct_CC=$CC
+  for ac_prog in cl.exe
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ $as_echo "$as_me:$LINENO: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_CC="$ac_prog"
+    $as_echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { $as_echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+$as_echo "$ac_ct_CC" >&6; }
+else
+  { $as_echo "$as_me:$LINENO: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CC" && break
+done
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:$LINENO: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+fi
+
+fi
+
+
+test -z "$CC" && { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ { $as_echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }; }
+
+# Provide some information about the compiler.
+$as_echo "$as_me:$LINENO: checking for C compiler version" >&5
+set X $ac_compile
+ac_compiler=$2
+{ (ac_try="$ac_compiler --version >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_compiler --version >&5") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+{ (ac_try="$ac_compiler -v >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_compiler -v >&5") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+{ (ac_try="$ac_compiler -V >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_compiler -V >&5") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+{ $as_echo "$as_me:$LINENO: checking for C compiler default output file name" >&5
+$as_echo_n "checking for C compiler default output file name... " >&6; }
+ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+
+# The possible output files:
+ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*"
+
+ac_rmfiles=
+for ac_file in $ac_files
+do
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+    * ) ac_rmfiles="$ac_rmfiles $ac_file";;
+  esac
+done
+rm -f $ac_rmfiles
+
+if { (ac_try="$ac_link_default"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_link_default") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; then
+  # Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
+# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
+# in a Makefile.  We should not override ac_cv_exeext if it was cached,
+# so that the user can short-circuit this test for compilers unknown to
+# Autoconf.
+for ac_file in $ac_files ''
+do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj )
+	;;
+    [ab].out )
+	# We found the default executable, but exeext='' is most
+	# certainly right.
+	break;;
+    *.* )
+        if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
+	then :; else
+	   ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+	fi
+	# We set ac_cv_exeext here because the later test for it is not
+	# safe: cross compilers may not add the suffix if given an `-o'
+	# argument, so we may need to know it at that point already.
+	# Even if this section looks crufty: it has the advantage of
+	# actually working.
+	break;;
+    * )
+	break;;
+  esac
+done
+test "$ac_cv_exeext" = no && ac_cv_exeext=
+
+else
+  ac_file=''
+fi
+
+{ $as_echo "$as_me:$LINENO: result: $ac_file" >&5
+$as_echo "$ac_file" >&6; }
+if test -z "$ac_file"; then
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ { $as_echo "$as_me:$LINENO: error: C compiler cannot create executables
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: C compiler cannot create executables
+See \`config.log' for more details." >&2;}
+   { (exit 77); exit 77; }; }; }
+fi
+
+ac_exeext=$ac_cv_exeext
+
+# Check that the compiler produces executables we can run.  If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:$LINENO: checking whether the C compiler works" >&5
+$as_echo_n "checking whether the C compiler works... " >&6; }
+# FIXME: These cross compiler hacks should be removed for Autoconf 3.0
+# If not cross compiling, check that we can run a simple program.
+if test "$cross_compiling" != yes; then
+  if { ac_try='./$ac_file'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+    cross_compiling=no
+  else
+    if test "$cross_compiling" = maybe; then
+	cross_compiling=yes
+    else
+	{ { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ { $as_echo "$as_me:$LINENO: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }; }
+    fi
+  fi
+fi
+{ $as_echo "$as_me:$LINENO: result: yes" >&5
+$as_echo "yes" >&6; }
+
+rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+# Check that the compiler produces executables we can run.  If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:$LINENO: checking whether we are cross compiling" >&5
+$as_echo_n "checking whether we are cross compiling... " >&6; }
+{ $as_echo "$as_me:$LINENO: result: $cross_compiling" >&5
+$as_echo "$cross_compiling" >&6; }
+
+{ $as_echo "$as_me:$LINENO: checking for suffix of executables" >&5
+$as_echo_n "checking for suffix of executables... " >&6; }
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; then
+  # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'.  For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;;
+    *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+	  break;;
+    * ) break;;
+  esac
+done
+else
+  { { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ { $as_echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }; }
+fi
+
+rm -f conftest$ac_cv_exeext
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5
+$as_echo "$ac_cv_exeext" >&6; }
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+{ $as_echo "$as_me:$LINENO: checking for suffix of object files" >&5
+$as_echo_n "checking for suffix of object files... " >&6; }
+if test "${ac_cv_objext+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_compile") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; then
+  for ac_file in conftest.o conftest.obj conftest.*; do
+  test -f "$ac_file" || continue;
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;;
+    *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+       break;;
+  esac
+done
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { $as_echo "$as_me:$LINENO: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+{ { $as_echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&5
+$as_echo "$as_me: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }; }
+fi
+
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_objext" >&5
+$as_echo "$ac_cv_objext" >&6; }
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+{ $as_echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5
+$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
+if test "${ac_cv_c_compiler_gnu+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+#ifndef __GNUC__
+       choke me
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  ac_compiler_gnu=yes
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_compiler_gnu=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5
+$as_echo "$ac_cv_c_compiler_gnu" >&6; }
+if test $ac_compiler_gnu = yes; then
+  GCC=yes
+else
+  GCC=
+fi
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+{ $as_echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5
+$as_echo_n "checking whether $CC accepts -g... " >&6; }
+if test "${ac_cv_prog_cc_g+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  ac_save_c_werror_flag=$ac_c_werror_flag
+   ac_c_werror_flag=yes
+   ac_cv_prog_cc_g=no
+   CFLAGS="-g"
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  ac_cv_prog_cc_g=yes
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	CFLAGS=""
+      cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  :
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_c_werror_flag=$ac_save_c_werror_flag
+	 CFLAGS="-g"
+	 cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  ac_cv_prog_cc_g=yes
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5
+$as_echo "$ac_cv_prog_cc_g" >&6; }
+if test "$ac_test_CFLAGS" = set; then
+  CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+  if test "$GCC" = yes; then
+    CFLAGS="-g -O2"
+  else
+    CFLAGS="-g"
+  fi
+else
+  if test "$GCC" = yes; then
+    CFLAGS="-O2"
+  else
+    CFLAGS=
+  fi
+fi
+{ $as_echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5
+$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
+if test "${ac_cv_prog_cc_c89+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+     char **p;
+     int i;
+{
+  return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+  char *s;
+  va_list v;
+  va_start (v,p);
+  s = g (p, va_arg (v,int));
+  va_end (v);
+  return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default.  It has
+   function prototypes and stuff, but not '\xHH' hex character constants.
+   These don't provoke an error unfortunately, instead are silently treated
+   as 'x'.  The following induces an error, until -std is added to get
+   proper ANSI mode.  Curiously '\x00'!='x' always comes out true, for an
+   array size at least.  It's necessary to write '\x00'==0 to get something
+   that's true only with -std.  */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+   inside strings and character constants.  */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0]  ||  f (e, argv, 1) != argv[1];
+  ;
+  return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+	-Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+  CC="$ac_save_CC $ac_arg"
+  rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest.$ac_objext; then
+  ac_cv_prog_cc_c89=$ac_arg
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext
+  test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+  x)
+    { $as_echo "$as_me:$LINENO: result: none needed" >&5
+$as_echo "none needed" >&6; } ;;
+  xno)
+    { $as_echo "$as_me:$LINENO: result: unsupported" >&5
+$as_echo "unsupported" >&6; } ;;
+  *)
+    CC="$CC $ac_cv_prog_cc_c89"
+    { $as_echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5
+$as_echo "$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+{ $as_echo "$as_me:$LINENO: checking for BZ2_bzDecompressInit in -lbz2" >&5
+$as_echo_n "checking for BZ2_bzDecompressInit in -lbz2... " >&6; }
+if test "${ac_cv_lib_bz2_BZ2_bzDecompressInit+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lbz2  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char BZ2_bzDecompressInit ();
+int
+main ()
+{
+return BZ2_bzDecompressInit ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+	 test -z "$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+	 test "$cross_compiling" = yes ||
+	 $as_test_x conftest$ac_exeext
+       }; then
+  ac_cv_lib_bz2_BZ2_bzDecompressInit=yes
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_bz2_BZ2_bzDecompressInit=no
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:$LINENO: result: $ac_cv_lib_bz2_BZ2_bzDecompressInit" >&5
+$as_echo "$ac_cv_lib_bz2_BZ2_bzDecompressInit" >&6; }
+if test "x$ac_cv_lib_bz2_BZ2_bzDecompressInit" = x""yes; then
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBBZ2 1
+_ACEOF
+
+  LIBS="-lbz2 $LIBS"
+
+fi
+
+
+ac_config_files="$ac_config_files src/Makevars"
+
+cp confdefs.h src/config.h
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems.  If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, we kill variables containing newlines.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(
+  for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { $as_echo "$as_me:$LINENO: WARNING: cache variable $ac_var contains a newline" >&5
+$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) $as_unset $ac_var ;;
+      esac ;;
+    esac
+  done
+
+  (set) 2>&1 |
+    case $as_nl`(ac_space=' '; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      # `set' does not quote correctly, so add quotes (double-quote
+      # substitution turns \\\\ into \\, and sed turns \\ into \).
+      sed -n \
+	"s/'/'\\\\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+      ;; #(
+    *)
+      # `set' quotes correctly as required by POSIX, so do not add quotes.
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+) |
+  sed '
+     /^ac_cv_env_/b end
+     t clear
+     :clear
+     s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+     t end
+     s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+     :end' >>confcache
+if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
+  if test -w "$cache_file"; then
+    test "x$cache_file" != "x/dev/null" &&
+      { $as_echo "$as_me:$LINENO: updating cache $cache_file" >&5
+$as_echo "$as_me: updating cache $cache_file" >&6;}
+    cat confcache >$cache_file
+  else
+    { $as_echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5
+$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
+  fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+# Transform confdefs.h into DEFS.
+# Protect against shell expansion while executing Makefile rules.
+# Protect against Makefile macro expansion.
+#
+# If the first sed substitution is executed (which looks for macros that
+# take arguments), then branch to the quote section.  Otherwise,
+# look for a macro that doesn't take arguments.
+ac_script='
+:mline
+/\\$/{
+ N
+ s,\\\n,,
+ b mline
+}
+t clear
+:clear
+s/^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 (][^	 (]*([^)]*)\)[	 ]*\(.*\)/-D\1=\2/g
+t quote
+s/^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 ][^	 ]*\)[	 ]*\(.*\)/-D\1=\2/g
+t quote
+b any
+:quote
+s/[	 `~#$^&*(){}\\|;'\''"<>?]/\\&/g
+s/\[/\\&/g
+s/\]/\\&/g
+s/\$/$$/g
+H
+:any
+${
+	g
+	s/^\n//
+	s/\n/ /g
+	p
+}
+'
+DEFS=`sed -n "$ac_script" confdefs.h`
+
+
+ac_libobjs=
+ac_ltlibobjs=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+  # 1. Remove the extension, and $U if already installed.
+  ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
+  ac_i=`$as_echo "$ac_i" | sed "$ac_script"`
+  # 2. Prepend LIBOBJDIR.  When used with automake>=1.10 LIBOBJDIR
+  #    will be set to the directory where LIBOBJS objects are built.
+  ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+  ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+
+: ${CONFIG_STATUS=./config.status}
+ac_write_fail=0
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ $as_echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5
+$as_echo "$as_me: creating $CONFIG_STATUS" >&6;}
+cat >$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+SHELL=\${CONFIG_SHELL-$SHELL}
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+## --------------------- ##
+## M4sh Initialization.  ##
+## --------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in
+  *posix*) set -o posix ;;
+esac
+
+fi
+
+
+
+
+# PATH needs CR
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+as_nl='
+'
+export as_nl
+# Printing a long string crashes Solaris 7 /usr/bin/printf.
+as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo
+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo
+if (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then
+  as_echo='printf %s\n'
+  as_echo_n='printf %s'
+else
+  if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then
+    as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"'
+    as_echo_n='/usr/ucb/echo -n'
+  else
+    as_echo_body='eval expr "X$1" : "X\\(.*\\)"'
+    as_echo_n_body='eval
+      arg=$1;
+      case $arg in
+      *"$as_nl"*)
+	expr "X$arg" : "X\\(.*\\)$as_nl";
+	arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;;
+      esac;
+      expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl"
+    '
+    export as_echo_n_body
+    as_echo_n='sh -c $as_echo_n_body as_echo'
+  fi
+  export as_echo_body
+  as_echo='sh -c $as_echo_body as_echo'
+fi
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+  as_unset=unset
+else
+  as_unset=false
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+case $0 in
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  { (exit 1); exit 1; }
+fi
+
+# Work around bugs in pre-3.0 UWIN ksh.
+for as_var in ENV MAIL MAILPATH
+do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# CDPATH.
+$as_unset CDPATH
+
+
+
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || {
+
+  # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+  # uniformly replaced by the line number.  The first 'sed' inserts a
+  # line-number line after each line using $LINENO; the second 'sed'
+  # does the real work.  The second script uses 'N' to pair each
+  # line-number line with the line containing $LINENO, and appends
+  # trailing '-' during substitution so that $LINENO is not a special
+  # case at line end.
+  # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+  # scripts with optimization help from Paolo Bonzini.  Blame Lee
+  # E. McMahon (1931-1989) for sed's syntax.  :-)
+  sed -n '
+    p
+    /[$]LINENO/=
+  ' <$as_myself |
+    sed '
+      s/[$]LINENO.*/&-/
+      t lineno
+      b
+      :lineno
+      N
+      :loop
+      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+      t loop
+      s/-\n.*//
+    ' >$as_me.lineno &&
+  chmod +x "$as_me.lineno" ||
+    { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+   { (exit 1); exit 1; }; }
+
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensitive to this).
+  . "./$as_me.lineno"
+  # Exit status is that of the last command.
+  exit
+}
+
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in
+-n*)
+  case `echo 'x\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  *)   ECHO_C='\c';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+    # In both cases, we have to default to `cp -p'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -p'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -p'
+  fi
+else
+  as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p=:
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+if test -x / >/dev/null 2>&1; then
+  as_test_x='test -x'
+else
+  if ls -dL / >/dev/null 2>&1; then
+    as_ls_L_option=L
+  else
+    as_ls_L_option=
+  fi
+  as_test_x='
+    eval sh -c '\''
+      if test -d "$1"; then
+	test -d "$1/.";
+      else
+	case $1 in
+	-*)set "./$1";;
+	esac;
+	case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in
+	???[sx]*):;;*)false;;esac;fi
+    '\'' sh
+  '
+fi
+as_executable_p=$as_test_x
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+exec 6>&1
+
+# Save the log message, to keep $[0] and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.
+ac_log="
+This file was extended by SPP $as_me 1.7, which was
+generated by GNU Autoconf 2.63.  Invocation command line was
+
+  CONFIG_FILES    = $CONFIG_FILES
+  CONFIG_HEADERS  = $CONFIG_HEADERS
+  CONFIG_LINKS    = $CONFIG_LINKS
+  CONFIG_COMMANDS = $CONFIG_COMMANDS
+  $ $0 $@
+
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
+_ACEOF
+
+case $ac_config_files in *"
+"*) set x $ac_config_files; shift; ac_config_files=$*;;
+esac
+
+
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+# Files that config.status was made for.
+config_files="$ac_config_files"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+ac_cs_usage="\
+\`$as_me' instantiates files from templates according to the
+current configuration.
+
+Usage: $0 [OPTION]... [FILE]...
+
+  -h, --help       print this help, then exit
+  -V, --version    print version number and configuration settings, then exit
+  -q, --quiet, --silent
+                   do not print progress messages
+  -d, --debug      don't remove temporary files
+      --recheck    update $as_me by reconfiguring in the same conditions
+      --file=FILE[:TEMPLATE]
+                   instantiate the configuration file FILE
+
+Configuration files:
+$config_files
+
+Report bugs to <bug-autoconf@gnu.org>."
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_cs_version="\\
+SPP config.status 1.7
+configured by $0, generated by GNU Autoconf 2.63,
+  with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
+
+Copyright (C) 2008 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+ac_pwd='$ac_pwd'
+srcdir='$srcdir'
+test -n "\$AWK" || AWK=awk
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# The default lists apply if the user does not specify any file.
+ac_need_defaults=:
+while test $# != 0
+do
+  case $1 in
+  --*=*)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
+    ac_shift=:
+    ;;
+  *)
+    ac_option=$1
+    ac_optarg=$2
+    ac_shift=shift
+    ;;
+  esac
+
+  case $ac_option in
+  # Handling of the options.
+  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+    ac_cs_recheck=: ;;
+  --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+    $as_echo "$ac_cs_version"; exit ;;
+  --debug | --debu | --deb | --de | --d | -d )
+    debug=: ;;
+  --file | --fil | --fi | --f )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    CONFIG_FILES="$CONFIG_FILES '$ac_optarg'"
+    ac_need_defaults=false;;
+  --he | --h |  --help | --hel | -h )
+    $as_echo "$ac_cs_usage"; exit ;;
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil | --si | --s)
+    ac_cs_silent=: ;;
+
+  # This is an error.
+  -*) { $as_echo "$as_me: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&2
+   { (exit 1); exit 1; }; } ;;
+
+  *) ac_config_targets="$ac_config_targets $1"
+     ac_need_defaults=false ;;
+
+  esac
+  shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+  exec 6>/dev/null
+  ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+if \$ac_cs_recheck; then
+  set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+  shift
+  \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
+  CONFIG_SHELL='$SHELL'
+  export CONFIG_SHELL
+  exec "\$@"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+exec 5>>config.log
+{
+  echo
+  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+  $as_echo "$ac_log"
+} >&5
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+
+# Handling of arguments.
+for ac_config_target in $ac_config_targets
+do
+  case $ac_config_target in
+    "src/Makevars") CONFIG_FILES="$CONFIG_FILES src/Makevars" ;;
+
+  *) { { $as_echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
+$as_echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
+   { (exit 1); exit 1; }; };;
+  esac
+done
+
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used.  Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+  test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+fi
+
+# Have a temporary directory for convenience.  Make it in the build tree
+# simply because there is no reason against having it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to `$tmp'.
+$debug ||
+{
+  tmp=
+  trap 'exit_status=$?
+  { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status
+' 0
+  trap '{ (exit 1); exit 1; }' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+
+{
+  tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
+  test -n "$tmp" && test -d "$tmp"
+}  ||
+{
+  tmp=./conf$$-$RANDOM
+  (umask 077 && mkdir "$tmp")
+} ||
+{
+   $as_echo "$as_me: cannot create a temporary directory in ." >&2
+   { (exit 1); exit 1; }
+}
+
+# Set up the scripts for CONFIG_FILES section.
+# No need to generate them if there are no CONFIG_FILES.
+# This happens for instance with `./config.status config.h'.
+if test -n "$CONFIG_FILES"; then
+
+
+ac_cr='
'
+ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
+if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
+  ac_cs_awk_cr='\\r'
+else
+  ac_cs_awk_cr=$ac_cr
+fi
+
+echo 'BEGIN {' >"$tmp/subs1.awk" &&
+_ACEOF
+
+
+{
+  echo "cat >conf$$subs.awk <<_ACEOF" &&
+  echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
+  echo "_ACEOF"
+} >conf$$subs.sh ||
+  { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+$as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+   { (exit 1); exit 1; }; }
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '$'`
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  . ./conf$$subs.sh ||
+    { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+$as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+   { (exit 1); exit 1; }; }
+
+  ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
+  if test $ac_delim_n = $ac_delim_num; then
+    break
+  elif $ac_last_try; then
+    { { $as_echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+$as_echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+   { (exit 1); exit 1; }; }
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+rm -f conf$$subs.sh
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+cat >>"\$tmp/subs1.awk" <<\\_ACAWK &&
+_ACEOF
+sed -n '
+h
+s/^/S["/; s/!.*/"]=/
+p
+g
+s/^[^!]*!//
+:repl
+t repl
+s/'"$ac_delim"'$//
+t delim
+:nl
+h
+s/\(.\{148\}\).*/\1/
+t more1
+s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
+p
+n
+b repl
+:more1
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t nl
+:delim
+h
+s/\(.\{148\}\).*/\1/
+t more2
+s/["\\]/\\&/g; s/^/"/; s/$/"/
+p
+b
+:more2
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t delim
+' <conf$$subs.awk | sed '
+/^[^""]/{
+  N
+  s/\n//
+}
+' >>$CONFIG_STATUS || ac_write_fail=1
+rm -f conf$$subs.awk
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACAWK
+cat >>"\$tmp/subs1.awk" <<_ACAWK &&
+  for (key in S) S_is_set[key] = 1
+  FS = ""
+
+}
+{
+  line = $ 0
+  nfields = split(line, field, "@")
+  substed = 0
+  len = length(field[1])
+  for (i = 2; i < nfields; i++) {
+    key = field[i]
+    keylen = length(key)
+    if (S_is_set[key]) {
+      value = S[key]
+      line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3)
+      len += length(value) + length(field[++i])
+      substed = 1
+    } else
+      len += 1 + keylen
+  }
+
+  print line
+}
+
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
+  sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
+else
+  cat
+fi < "$tmp/subs1.awk" > "$tmp/subs.awk" \
+  || { { $as_echo "$as_me:$LINENO: error: could not setup config files machinery" >&5
+$as_echo "$as_me: error: could not setup config files machinery" >&2;}
+   { (exit 1); exit 1; }; }
+_ACEOF
+
+# VPATH may cause trouble with some makes, so we remove $(srcdir),
+# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+  ac_vpsub='/^[	 ]*VPATH[	 ]*=/{
+s/:*\$(srcdir):*/:/
+s/:*\${srcdir}:*/:/
+s/:*@srcdir@:*/:/
+s/^\([^=]*=[	 ]*\):*/\1/
+s/:*$//
+s/^[^=]*=[	 ]*$//
+}'
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+fi # test -n "$CONFIG_FILES"
+
+
+eval set X "  :F $CONFIG_FILES      "
+shift
+for ac_tag
+do
+  case $ac_tag in
+  :[FHLC]) ac_mode=$ac_tag; continue;;
+  esac
+  case $ac_mode$ac_tag in
+  :[FHL]*:*);;
+  :L* | :C*:*) { { $as_echo "$as_me:$LINENO: error: invalid tag $ac_tag" >&5
+$as_echo "$as_me: error: invalid tag $ac_tag" >&2;}
+   { (exit 1); exit 1; }; };;
+  :[FH]-) ac_tag=-:-;;
+  :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
+  esac
+  ac_save_IFS=$IFS
+  IFS=:
+  set x $ac_tag
+  IFS=$ac_save_IFS
+  shift
+  ac_file=$1
+  shift
+
+  case $ac_mode in
+  :L) ac_source=$1;;
+  :[FH])
+    ac_file_inputs=
+    for ac_f
+    do
+      case $ac_f in
+      -) ac_f="$tmp/stdin";;
+      *) # Look for the file first in the build tree, then in the source tree
+	 # (if the path is not absolute).  The absolute path cannot be DOS-style,
+	 # because $ac_f cannot contain `:'.
+	 test -f "$ac_f" ||
+	   case $ac_f in
+	   [\\/$]*) false;;
+	   *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+	   esac ||
+	   { { $as_echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5
+$as_echo "$as_me: error: cannot find input file: $ac_f" >&2;}
+   { (exit 1); exit 1; }; };;
+      esac
+      case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
+      ac_file_inputs="$ac_file_inputs '$ac_f'"
+    done
+
+    # Let's still pretend it is `configure' which instantiates (i.e., don't
+    # use $as_me), people would be surprised to read:
+    #    /* config.h.  Generated by config.status.  */
+    configure_input='Generated from '`
+	  $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g'
+	`' by configure.'
+    if test x"$ac_file" != x-; then
+      configure_input="$ac_file.  $configure_input"
+      { $as_echo "$as_me:$LINENO: creating $ac_file" >&5
+$as_echo "$as_me: creating $ac_file" >&6;}
+    fi
+    # Neutralize special characters interpreted by sed in replacement strings.
+    case $configure_input in #(
+    *\&* | *\|* | *\\* )
+       ac_sed_conf_input=`$as_echo "$configure_input" |
+       sed 's/[\\\\&|]/\\\\&/g'`;; #(
+    *) ac_sed_conf_input=$configure_input;;
+    esac
+
+    case $ac_tag in
+    *:-:* | *:-) cat >"$tmp/stdin" \
+      || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5
+$as_echo "$as_me: error: could not create $ac_file" >&2;}
+   { (exit 1); exit 1; }; } ;;
+    esac
+    ;;
+  esac
+
+  ac_dir=`$as_dirname -- "$ac_file" ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$ac_file" : 'X\(//\)[^/]' \| \
+	 X"$ac_file" : 'X\(//\)$' \| \
+	 X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$ac_file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  { as_dir="$ac_dir"
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || { { $as_echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5
+$as_echo "$as_me: error: cannot create directory $as_dir" >&2;}
+   { (exit 1); exit 1; }; }; }
+  ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+  case $ac_mode in
+  :F)
+  #
+  # CONFIG_FILE
+  #
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+
+ac_sed_dataroot='
+/datarootdir/ {
+  p
+  q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p
+'
+case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+  { $as_echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+  ac_datarootdir_hack='
+  s&@datadir@&$datadir&g
+  s&@docdir@&$docdir&g
+  s&@infodir@&$infodir&g
+  s&@localedir@&$localedir&g
+  s&@mandir@&$mandir&g
+    s&\\\${datarootdir}&$datarootdir&g' ;;
+esac
+_ACEOF
+
+# Neutralize VPATH when `$srcdir' = `.'.
+# Shell code in configure.ac might set extrasub.
+# FIXME: do we really want to maintain this feature?
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_sed_extra="$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s|@configure_input@|$ac_sed_conf_input|;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@top_build_prefix@&$ac_top_build_prefix&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+$ac_datarootdir_hack
+"
+eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$tmp/subs.awk" >$tmp/out \
+  || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5
+$as_echo "$as_me: error: could not create $ac_file" >&2;}
+   { (exit 1); exit 1; }; }
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+  { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
+  { ac_out=`sed -n '/^[	 ]*datarootdir[	 ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } &&
+  { $as_echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined." >&5
+$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined." >&2;}
+
+  rm -f "$tmp/stdin"
+  case $ac_file in
+  -) cat "$tmp/out" && rm -f "$tmp/out";;
+  *) rm -f "$ac_file" && mv "$tmp/out" "$ac_file";;
+  esac \
+  || { { $as_echo "$as_me:$LINENO: error: could not create $ac_file" >&5
+$as_echo "$as_me: error: could not create $ac_file" >&2;}
+   { (exit 1); exit 1; }; }
+ ;;
+
+
+
+  esac
+
+done # for ac_tag
+
+
+{ (exit 0); exit 0; }
+_ACEOF
+chmod +x $CONFIG_STATUS
+ac_clean_files=$ac_clean_files_save
+
+test $ac_write_fail = 0 ||
+  { { $as_echo "$as_me:$LINENO: error: write failure creating $CONFIG_STATUS" >&5
+$as_echo "$as_me: error: write failure creating $CONFIG_STATUS" >&2;}
+   { (exit 1); exit 1; }; }
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded.  So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status.  When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+  ac_cs_success=:
+  ac_config_status_args=
+  test "$silent" = yes &&
+    ac_config_status_args="$ac_config_status_args --quiet"
+  exec 5>/dev/null
+  $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+  exec 5>>config.log
+  # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+  # would make configure fail if this is the last instruction.
+  $ac_cs_success || { (exit 1); exit 1; }
+fi
+if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
+  { $as_echo "$as_me:$LINENO: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
+$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
+fi
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/configure.ac	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,7 @@
+AC_INIT([SPP], 1.7)
+
+AC_CHECK_LIB(bz2, BZ2_bzDecompressInit)
+AC_SUBST(HAVE_LIBBZ2)
+AC_CONFIG_FILES([src/Makevars])
+cp confdefs.h src/config.h
+AC_OUTPUT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/add.broad.peak.regions.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,27 @@
+\name{add.broad.peak.regions}
+\alias{add.broad.peak.regions}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Calculate chromosome-wide profiles of smoothed tag density }
+\description{
+  Looks for broader regions of enrichment associated with the determined
+  peak positions, adds them to the $npl data as $rs, $re columns. 
+}
+\usage{
+add.broad.peak.regions(signal.tags, control.tags, binding.postions,window.size=500,z.thr=2)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{signal.tags}{ signal chromosome tag coordinate vectors (e.g. output
+    of \code{\link{select.informative.tags}} }
+  \item{control.tags}{ optionall control (input) tags }
+  \item{binding.positions}{ output of find.binding.positions call }
+  \item{window.size}{ window size to be used in calculating enrichment }
+  \item{z.thr}{ Z-score corresponding to the Poisson ratio threshold
+    used to flag significantly enriched windows}
+}
+\value{
+  A structure identical to binding.postions with two additional columns
+  added (rs and re) corresponding to start and end of the associated
+  significantly enriched region. If no region was associated with a
+  particular peak, NAs values are reported.
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/find.binding.positions.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,128 @@
+\name{find.binding.positions}
+\alias{find.binding.positions}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Determine significant point protein binding positions (peaks) }
+\description{
+  Given the signal and optional control (input) data, determine location of the
+  statistically significant point binding positions. If the control data
+  is not provided, the statistical significance can be assessed based on
+  tag randomization. The method also provides options for masking
+  regions exhibiting strong signals within the control data.
+}
+\usage{
+find.binding.positions(signal.data, e.value = NULL, fdr = NULL, masked.data = NULL, control.data = NULL, min.dist = 200, window.size = 4e+07, cluster = NULL, debug = T, n.randomizations = 3, shuffle.window = 1, min.thr = 0, topN = NULL, tag.count.whs = 100, enrichment.z = 2, method = tag.wtd, tec.filter = T, tec.window.size = 10000, tec.masking.window.size=tec.window.size, tec.z = 5, tec.poisson.z=5,tec.poisson.ratio=5, n.control.samples = 1, enrichment.background.scales = c(1, 5, 10), background.density.scaling = F, use.randomized.controls = F, ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  ~~ tag data ~~
+  \item{signal.data}{ signal tag vector list }
+  \item{control.data}{ optional control (input) tag vector list }
+
+  ~~ position stringency criteria ~~
+  \item{e.value}{ E-value defining the desired statistical significance
+    of binding positions.  }
+  \item{fdr}{ FDR defining statistical significance of binding positions  }
+  \item{topN}{ instead of determining statistical significance
+    thresholds, return the specified number of highest-scoring
+    positions}
+
+  ~~ other params ~~
+  \item{whs}{ window half-sized that should be used for binding
+    detection (e.g. determined from cross-correlation profiles)}
+  \item{masked.data}{ optional set of coordinates that should be masked
+    (e.g. known non-unique regions) }
+  \item{min.dist}{ minimal distance that must separate detected binding
+    positions. In case multiple binding positions are detected within
+    such distance, the position with the highest score is returned. }
+  \item{window.size}{ size of the window used to segment the chromosome
+    during calculations to reduce memory usage. }
+  \item{cluster}{ optional \code{snow} cluster to parallelize the
+    processing on }
+  \item{min.thr}{ minimal score requirement for a peak }
+  \item{background.density.scaling}{ If TRUE, regions of significant tag
+    enrichment will be masked out when calculating size ratio of the
+    signal to control datasets (to estimate ratio of the background tag
+    density). If FALSE, the dataset ratio will be equal to the ratio of
+    the number of tags in each dataset.}
+
+  ~~ randomized controls ~~
+  \item{n.randomizations}{ number of tag randomziations that should be
+    performed (when the control data is not provided) }
+  \item{use.randomized.controls}{ Use randomized tag control, even if
+    \code{control.data} is supplied. }
+  \item{shuffle.window}{ during tag randomizations, tags will be split
+    into groups of \code{shuffle.window} and will be maintained
+    together throughout the randomization. }
+
+  ~~ fold-enrichment confidence intervals
+  \item{tag.count.whs}{ half-size of a window used to assess fold
+    enrichment of a binding position}
+  \item{enrichment.z}{ Z-score used to define the significance level of
+    the fold-enrichment confidence intervals }
+    \item{enrichment.background.scales}{ In estimating the peak
+      fold-enrichment confidence intervals, the background tag density is
+      estimated based on windows with half-sizes of
+  \code{2*tag.count.whs*enrichment.background.scales}. }
+  \item{method}{ either \code{tag.wtd} for WTD method, or
+    \code{tag.lwcc} for MTC method}
+  \item{mle.filter}{ If turned on, will exclude predicted positions
+    whose MLE enrichment ratio (for any of the background scales) is
+    below a specified min.mle.threshold }
+  \item{min.mle.threshold}{ MLE enrichment ratio threshold that each
+    predicted position must exceed if mle.filter is turned on. }
+
+  ~~ masking regions of significant control enrichment ~~
+  \item{tec.filter}{ Whether to mask out the regions exhibiting
+  significant enrichment in the control data in doing other
+  calculations. The regions are identified using Poisson statistics
+  within sliding windows, either relative to the scaled signal (tec.z), or
+  relative to randomly-distributed expectation (tec.poisson.z).}
+  \item{tec.window.size}{ size of the window used to determine
+    significantly enrichent control regions }
+  \item{tec.masking.window.size}{ size of the window used to mask 
+    the area around significantly enrichent control regions }
+  \item{tec.z}{ Z-score defining statistical stringency by which a given
+    window is determined to be significantly higher in the input than in
+    the signal, and masked if that is the case.}
+  \item{tec.poisson.z}{ Z-score defining statistical stringency by which a given
+    window is determined to be significantly higher than the
+    tec.poisson.ratio above the expected uniform input background. }
+  \item{tec.poisson.ratio}{ Fold ratio by which input must exceed the
+    level expected from the uniform distribution. }
+
+
+  
+  
+}
+\value{
+  \item{npl}{A per-chromosome list containing data frames describing
+    determined binding positions. Column description:
+    \item{x}{ position }
+    \item{y}{ score }
+    \item{evalue}{ E-value }
+    \item{fdr}{ FDR. For peaks higher than the maximum control peak,
+      the highest dataset FDR is reported }
+    \item{enr}{ lower bound of the fold-enrichment ratio confidence
+      interval. This is the estimate determined using scale of
+      1. Estimates corresponding to higher scales are returned in other enr columns
+      with scale appearing in the name.}
+    \item{enr.mle}{ enrichment ratio maximum likely estimate }
+  }
+  \item{thr}{ info on the chosen statistical threshold of the peak scores}
+}
+
+\examples{
+  # find binding positions using WTD method, 200bp half-window size,
+control data, 1% FDR
+  bp <-
+find.binding.positions(signal.data=chip.data,control.data=input.data,fdr=0.01,method=tag.wtd,whs=200);
+
+  # find binding positions using MTC method, using 5 tag randomizations,
+  #  keeping pairs of tag positions together (shuffle.window=2)
+  bp <- find.binding.positions(signal.data=chip.data,control.data=input.data,fdr=0.01,method=tag.lwcc,whs=200,use.randomized.controls=T,n.randomizations=5,shuffle.window=2)
+
+  # print out the number of determined positions  
+  print(paste("detected",sum(unlist(lapply(bp$npl,function(d) length(d$x)))),"peaks"));
+  
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/get.binding.characteristics.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,55 @@
+\name{get.binding.characteristics}
+\alias{get.binding.characteristics}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Calculate characteristics of observed DNA-binding signal from
+  cross-correlation profiles }
+\description{
+  The methods calculates strand cross-correlation profile to determine binding
+  peak separation distance and approximate window size that should be used
+  for binding detection. If quality scores were given for the tags,
+  which quality bins improve the cross-correlation pattern.
+}
+\usage{
+get.binding.characteristics(data, srange = c(50, 500), bin = 5, cluster = NULL, debug = F, min.tag.count = 1000, acceptance.z.score = 3, remove.tag.anomalies = T, anomalies.z = 5,accept.all.tags=F)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{data}{ Tag/quality data: output of \code{read.eland.tags} or similar function }
+  \item{srange}{ A range within which the binding peak separation is
+    expected to fall. Should be larger than probe size to avoid artifacts. }
+  \item{bin}{ Resolution (in basepairs) at which cross-corrrelation
+    should be calculated. bin=1 is ideal, but takes longer to calculate. }
+  \item{cluster}{ optional snow cluster for parallel processing }
+  \item{debug}{ whether to print debug messages }
+  \item{min.tag.count}{ minimal number of tags on the chromosome to be
+    considered in the cross-correlation calculations }
+  \item{acceptance.z.score}{ A Z-score used to determine if a given tag
+    quality bin provides significant improvement to the strand cross-correlation }
+  \item{remove.tag.anomalies}{ Whether to remove singular tag count peaks prior to
+  calculation. This is recommended, since such positions may distort the
+  cross-correlation profile and increase the necessary computational time. }
+  \item{anomalies.z}{ Z-score for determining if the number of tags at a
+  given position is significantly higher about background, and should be
+  considered an anomaly.}
+  \item{accept.all.tags}{ Whether tag alignment quality calculations
+    should be skipped and all available tags should be accepted in the
+    downstream analysis.}
+}
+\value{
+  \item{cross.correlation }{ Cross-correlation profile as an $x/$y data.frame}
+  \item{peak }{Position ($x) and height ($y) of automatically detected
+    cross-correlation peak.}
+  \item{whs} { Optimized window half-size for binding detection (based
+    on the width of the cross-correlation peak) }
+  \item{quality.bin.acceptance} { A list structure, describing the
+    effect of inclusion of different tag quality bins on
+    cross-correlation, and a resolution on which bins should be
+    considered.
+    \item{informative.bins} { A boolean vector indicating whether the
+      inclusion of tags from the tag quality bin specified in the name
+      attribute significantly increases cross-correlation profile near
+      the peak.}
+    \item{quality.cc} { A list giving the cross-correlation profile
+      after the inclusion of the tags from different quality bins }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/get.broad.enrichment.clusters.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,27 @@
+\name{get.broad.enrichment.clusters}
+\alias{get.broad.enrichment.clusters}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Determine broad clusters of enrichment }
+\description{
+  Scan chromosomes with a pre-defined window size, comparing scaled ChIP
+  and input tag coutns to see if their ratio exceeds that expected from
+  a Poisson process (normalized for dataset size).
+}
+\usage{
+get.broad.enrichment.clusters(chip.tags, input.tags, window.size=1e3,z.thr=3,tag.shift=146/2)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{chip.tags}{ foreground tag vector list }
+  \item{input.tags}{ background tag vector list }
+  \item{window.size}{ window size to be used for tag counting }
+  \item{z.thr}{ Z-score to be used as a significance threshold }
+  \item{tag.shift}{ number of base pairs by which positive and negative
+    tag coordinates should be shifted towards eachother (half of binding
+  peak separation distance)}
+}
+\value{
+  A list of elements corresponding to chromosomes, with each element
+  being an $s/$e/$rv data.frame giving the starting, ending positions and the log2
+  enrichment estimate for that region.
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/get.conservative.fold.enrichment.profile.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,59 @@
+\name{get.conservative.fold.enrichment.profile}
+\alias{get.conservative.fold.enrichment.profile}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Estimate minimal fold enrichment/depletion along the chromosomes }
+\description{
+  The method provides a statistical assessment of enrichment/depletion
+  along the chromosomes. To assess tag density enrichment/depletion, a
+  sliding window of a specified size (\code{fws}) is used to calculate
+  the density of the foreground tags (\code{ftl}). Multiple, typically
+  larger windows are used to estimate background tag (\code{btl}) density around the
+  same location. The densities are compared as ratios of two Poisson
+  processes to estimate lower bound of foreground enrichment, or upper
+  bound of foreground depletion. If multiple window sizes were used to
+  estimate the background tag density, the most conservative one is
+  chosen for each point. 
+}
+\usage{
+get.conservative.fold.enrichment.profile(ftl, btl, fws, bwsl = c(1, 5, 25, 50) * fws, step = 50, tag.shift = 146/2, alpha = 0.05, use.most.informative.scale = F, quick.calculation = T)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{ftl}{ foreground tag vector list }
+  \item{btl}{ background tag vector list }
+  \item{fws}{ foreground window size }
+  \item{bwsl}{ background window scales. The size(s) of background windows
+    will be \code{fws*bwsl}. }
+  \item{step}{ spacing between positions at which the
+    enrichment/depletion is evaluated }
+  \item{tag.shift}{ number of basepairs by which positive and negative
+    tag coordinates should be shifted towards eachother (half of binding
+  peak separation distance)}
+  \item{alpha}{ desired level of statistical significance }
+  \item{use.most.informative.scale}{ for each position, instead of
+    evaluating enrichment ratio bounds for all background window scales,
+    choose the one with the highest observed density to speed up the calculations}
+  \item{quick.calculation}{ Use square root transformation method
+    instead of a Bayesian method. This speeds up the caclulation
+    considerably and is turned on by default. }
+  \item{background.density.scaling}{ If TRUE, regions of significant tag
+    enrichment will be masked out when calculating size ratio of the
+    signal to control datasets (to estimate ratio of the background tag
+    density). If FALSE, the dataset ratio will be equal to the ratio of
+    the number of tags in each dataset.}
+}
+\value{
+  A list of elements corresponding to chromosomes, with each element
+  being an $x/$y data.frame giving the position and the log2
+  conservative estimate of enrichment/depletion fold ratios around that
+  position. 
+  Use \code{\link{writewig}} to output the structure to a WIG
+  file.
+}
+\references{ R.M.Price, D.G. Bonett "Estimating the ratio fo two Poisson
+  rates", Comp. Stat & Data Anal. 32(2000) 345}
+\seealso{ \code{\link{get.smoothed.tag.density}} }
+\examples{
+  enrichment.estimates <- get.conservative.fold.enrichment.profile(chip.data,input.data,fws=2*binding.characteristics$whs,step=100,alpha=0.01);
+  writewig(enrichment.estimates,"example.enrichment.estimates.wig","Example conservative fold-enrichment/depletion estimates shown on log2 scale");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/get.mser.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,46 @@
+\name{get.mser}
+\alias{get.mser}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Calculate minimal saturated enrichment fold ratio }
+\description{
+  Determine if the dataset has reached absolute saturation, or otherwise
+  find minimal fold enrichment ratio above which the detection of peaks
+  has stabilized enough to meet the saturation criteria.
+}
+\usage{
+get.mser(signal.data, control.data, n.chains = 5, step.size = 1e+05, chains = NULL, cluster = NULL, test.agreement = 0.99, return.chains = F, enrichment.background.scales = c(1), n.steps = 1, ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{signal.data}{ signal tag vector list }
+  \item{control.data}{ control tag vector list }
+  \item{n.chains}{ number of dataset subsamples to use }
+  \item{step.size}{ subsampling step describing the saturation
+    criteria. The criteria requires the set of detected binding sites to
+    be stable (as described by the \code{test.agreement} param) when the
+    number of tags in the dataset is reduced by \code{step.size}. The
+    value can either be an integer above one, in which case it specifies a fixed
+    number of tags, or a real value below one, in which case it
+    specifies the fraction of tags that should be removed (e.g. 0.1 will
+    remove 10% of tags).
+  }
+  \item{test.agreement}{ Fraction of the detected peaks that should
+    agree between the full and subsampled datasets. }
+  \item{chains}{ optional parameter, giving pre-calculated chains }
+  \item{cluster}{ optional \code{snow} cluster to parallelize processing }
+
+  \item{return.chains}{ whether subsampled dataset results should be returned as
+  well }
+  \item{enrichment.background.scales}{ one or multiple window scales at
+  which the background tag density should be assessed. See
+  \code{enrichment.background.scales} in
+  \code{\link{find.binding.positions}}. If multiple scales are provided,
+  multiple MSER estimates will be returned.}
+  \item{\dots}{ additional parameters should be the same as those passed
+  to the \code{\link{find.binding.positions}}}
+}
+\value{
+  A single, or multple (if multiple \code{enrichment.background.scales} were
+  provided) MSER value. A value of 1 or very close to it implies that
+  the dataset has reached absolute saturation based on the given criteria.
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/get.mser.interpolation.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,56 @@
+\name{get.mser.interpolation}
+\alias{get.mser.interpolation}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Interpolate MSER dependency on the tag count }
+\description{
+  MSER generally decreases with increasing sequencing depth. This
+  function interpolates the dependency of MSER on tag counts as a
+  log-log linear function. The log-log fit is used to estimate the depth
+  of sequencing required to reach desired \code{target.fold.enrichment}.
+}
+\usage{
+get.mser.interpolation(signal.data, control.data, target.fold.enrichment = 5, n.chains = 10, n.steps = 6, step.size = 1e+05, chains = NULL, test.agreement = 0.99, return.chains = F, enrichment.background.scales = c(1), excluded.steps = c(seq(2, n.steps - 2)), ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{signal.data}{ signal chromosome tag vector list }
+  \item{control.data}{ control chromosome tag vector list }
+  \item{target.fold.enrichment}{ target MSER for which the depth should
+    be estimated}
+  \item{n.steps}{ number of steps in each subset chain. }
+  \item{step.size}{ Either number of tags or fraction of the dataset
+    size, see \code{step.size} parameter for \code{\link{get.mser}}. }
+  \item{test.agreement}{ Fraction of the detected peaks that should
+    agree between the full and subsampled datasets. See \code{test.agreement} parameter for \code{\link{get.mser}}}
+  \item{n.chains}{ number of random subset chains }
+  \item{chains}{ optional structure of pre-calculated chains
+    (e.g. generated by an earlier call with \code{return.chains=T}.}
+  
+  \item{return.chains}{ whether to return peak predictions calculated on
+  random chains. These can be passed back using \code{chains} argument
+  to skip subsampling/prediction steps, and just recalculate the depth
+  estimate for a different MSER.}
+  \item{enrichment.background.scales}{ see \code{enrichment.background.scales} parameter for \code{\link{get.mser}} }
+  \item{excluded.steps}{ Intermediate subsampling steps that should be excluded from
+  the chains to speed up the calculation. By default, all intermediate
+  steps except for first two and last two are skipped. Adding
+  intermediate steps improves interpolation at the expense of
+  computational time.}
+  \item{\dots}{ additional parameters are passed to \code{\link{get.mser}} }
+}
+\details{
+  To simulate sequencing growth, the method calculates peak predictions
+  on random chains. Each chain is produced by sequential random
+  subsampling of the original data. The number of steps in the chain
+  indicates how many times the random subsampling will be performed.
+}
+\value{
+  Normally reurns a list, specifying for each backgroundscale:
+  \item{prediction}{estimated sequencing depth required to reach
+  specified target MSER}
+  \item{log10.fit}{linear fit model, a result of \code{lm()} call}
+
+  If \code{return.chains=T}, the above structure is returned under
+  \code{interpolation} field, along with \code{chains} field containing
+  results of \code{\link{find.binding.positions}} calls on subsampled chains.
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/get.smoothed.enrichment.mle.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,35 @@
+\name{get.smoothed.enrichment.mle}
+\alias{get.smoothed.enrichment.mle}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Calculate chromosome-wide profiles of smoothed enrichment estimate }
+\description{
+  Given signal and control tag positions, the method calculates log2
+  signal to control enrichment esimates (maximum likelihood) for each
+  chromosome, based on the smoothed tag density profile (see \link{get.smoothed.tag.density}).
+}
+\usage{
+get.smoothed.enrichment.mle(signal.tags, control.tags, bandwidth = 150,tag.shift = 146/2, step = 50)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{signal.tags}{ signal chromosome tag coordinate vectors (e.g. output
+    of \code{\link{select.informative.tags}} }
+  \item{control.tags}{ control (input) tags }
+  \item{pseudocount}{ pseudocount value to be added to tag density -
+    defaults to 1 }
+  other parameters (such as bandwidth, step.size and tag.shift) are
+  passed to \link{get.smoothed.tag.density} - see appropriate reference
+  for details.
+}
+\value{
+  A list of elements corresponding to chromosomes, with each element
+  being an $x/$y data.frame giving the position and associated
+  log2 signal/control enrichment estimate. 
+}
+\seealso{ \code{\link{writewig}} }
+\examples{
+  # get smoothed enrichment estimate profile using 500bp bandwidth at
+  # 50bp steps
+  smoothed.M <- get.smoothed.enrichment.mle(chip.data,bandwidth=500,step=50);
+  writewig(smoothed.M,"example.smoothedM.wig","Example smoothed log2 intensity ratio estimate");
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/get.smoothed.tag.density.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,45 @@
+\name{get.smoothed.tag.density}
+\alias{get.smoothed.tag.density}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Calculate chromosome-wide profiles of smoothed tag density }
+\description{
+  Given tag positions, the method calculates for each chromosome a tag
+  density profile, smoothed by the Gaussian kernel. If the optional
+  control tags are provided, the difference between ChIP and control tag
+  density is returned.
+}
+\usage{
+get.smoothed.tag.density(signal.tags, control.tags = NULL, bandwidth = 150, bg.weight = NULL, tag.shift = 146/2, step = round(bandwidth/3))
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{signal.tags}{ signal chromosome tag coordinate vectors (e.g. output
+    of \code{\link{select.informative.tags}} }
+  \item{control.tags}{ optional control (input) tags }
+  \item{bandwidth}{ standard deviation of the Gaussian kernel }
+  \item{bg.weight}{ optional weight by which the background density
+    should be multipled for scaling. If not supplied, the weight is
+    calculated based on the ratio of the reduced ChIP to input dataset sizes. }
+  \item{tag.shift}{ Distance by which the positive and negative strand
+    tags should be shifted towards eachother. This
+    normally corresponds to the half of the cross-correlation peak
+    position (e.g. \code{get.binding.characteristics()}$peak$x/2) }
+  \item{step}{ The distance between the regularly spaced points for
+    which the values should be calculated. }
+  \item{background.density.scaling}{ If TRUE, regions of significant tag
+    enrichment will be masked out when calculating size ratio of the
+    signal to control datasets (to estimate ratio of the background tag
+    density). If FALSE, the dataset ratio will be equal to the ratio of
+    the number of tags in each dataset.}
+}
+\value{
+  A list of elements corresponding to chromosomes, with each element
+  being an $x/$y data.frame giving the position and associated tag
+  density. Use \code{\link{writewig}} to output the structure to a WIG
+  file.
+}
+\seealso{ \code{\link{writewig}} }
+\examples{
+  smoothed.density <- get.smoothed.tag.density(chip.data,control.tags=input.data,bandwidth=200,step=100,tag.shift=round(binding.characteristics$peak$x/2));
+  writewig(smoothed.density,"example.density.wig","Example smoothed, background-subtracted tag density");
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/output.binding.results.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,24 @@
+\name{output.binding.results}
+\alias{output.binding.results}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Write out determined binding peaks into a text file table }
+\description{
+  Writes out determined binding positions into a text file. The file
+  will contain a table with each row corresponding to a detected
+  position, with the following columns: 
+  \item{chr}{ chromosome or target sequence }
+  \item{pos}{ position of detected binding site on the chromosome/sequence}
+  \item{score}{a score reflecting magnitude of the binding}
+  \item{Evalue}{E-value corresponding to the peak magnitude}
+  \item{FDR}{FDR corresponding to the peak magnitude}
+  \item{enrichment.lb}{lower bound of the fold-enrichment ratio}
+  \item{enrichment.mle}{maximum likelihood estimate of the fold-enrichment ratio}
+}
+\usage{
+output.binding.results(results, filename)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{results}{ output of the \code{\link{find.binding.positions}} }
+  \item{filename}{ file name }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/read.bam.tags.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,24 @@
+\name{read.bam.tags}
+\alias{read.bam.tags}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Read BAM alignment file }
+\description{
+  Reads in aligned reads from BAM file. Note: no split (non-unique)
+  alignemnts should be reported in the BAM file.
+}
+\usage{
+read.bam.tags(filename, read.tag.names = F, fix.chromosome.names = F)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{filename}{ BAM file }
+  \item{read.tag.names}{ Whether the tag names should be read in }
+  \item{fix.chromosome.names}{ Whether to remove ".fa" from the end of
+    the sequence names }
+}
+\value{
+  \item{tags }{ A vector of 5' tag coordinates, with negative values
+    corresponding to tags mapped to the negative strand. }
+  \item{quality }{ Number of mismatches }
+  \item{names }{ Tag names, if \code{read.tag.names} was set }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/read.bin.maqmap.tags.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,23 @@
+\name{read.bin.maqmap.tags}
+\alias{read.bin.maqmap.tags}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Read MAQ binary alignment map file }
+\description{
+  Reads in MAQ binary map alignment result file
+}
+\usage{
+read.bin.maqmap.tags(filename, read.tag.names = F, fix.chromosome.names = T)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{filename}{ MAQ map output file (binary) }
+  \item{read.tag.names}{ Whether the tag names should be read in }
+  \item{fix.chromosome.names}{ Whether to remove ".fa" from the end of
+    the sequence names }
+}
+\value{
+  \item{tags }{ A vector of 5' tag coordinates, with negative values
+    corresponding to tags mapped to the negative strand. }
+  \item{quality }{ Number of mismatches }
+  \item{names }{ Tag names, if \code{read.tag.names} was set }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/read.bowtie.tags.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,23 @@
+\name{read.bowtie.tags}
+\alias{read.bowtie.tags}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Read bowtie text alignment output file }
+\description{
+  Reads in bowtie alignment results in text format
+}
+\usage{
+read.bowtie.tags(filename, read.tag.names = F, fix.chromosome.names = F)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{filename}{ bowtie text output file }
+  \item{read.tag.names}{ Whether the tag names should be read in }
+  \item{fix.chromosome.names}{ Whether to remove ".fa" from the end of
+    the sequence names }
+}
+\value{
+  \item{tags }{ A vector of 5' tag coordinates, with negative values
+    corresponding to tags mapped to the negative strand. }
+  \item{quality }{ Number of mismatches }
+  \item{names }{ Tag names, if \code{read.tag.names} was set }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/read.eland.tags.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,30 @@
+\name{read.eland.tags}
+\alias{read.eland.tags}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Read eland output file }
+\description{
+  Reads in ELAND output file, returning 5'-end tag coordinates and
+  number of mismatches associated with each mapped tag.
+}
+\usage{
+read.eland.tags(filename, read.tag.names = F, fix.chromosome.names = T, max.eland.tag.length = -1,extended=F)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{filename}{ ELAND output file }
+  \item{read.tag.names}{ Whether the tag names should be read in }
+  \item{fix.chromosome.names}{ Whether to remove ".fa" from the end of
+    the sequence names }
+  \item{max.eland.tag.length}{ Specifies max length of the tag sequence
+    considered by ELAND. This needs to be specified if the tags are
+    longer than the sequences considred by ELAND during alignment. }
+  \item{extended}{ Whether the file is written out in "extended" format
+    provided in GA pipeline 1.0. }
+  \item{multi}{ Whether the file is written in "multi" format, showing multiple alignments of the reads }
+}
+\value{
+  \item{tags }{ A vector of 5' tag coordinates, with negative values
+    corresponding to tags mapped to the negative strand. }
+  \item{quality }{ Number of mismatches }
+  \item{names }{ Tag names, if \code{read.tag.names} was set }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/read.maqmap.tags.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,23 @@
+\name{read.maqmap.tags}
+\alias{read.maqmap.tags}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Read MAQ text alignment output file }
+\description{
+  Reads in MAQ alignment results in text format (that results from "maq mapview" command.)
+}
+\usage{
+read.maqmap.tags(filename, read.tag.names = F, fix.chromosome.names = T)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{filename}{ MAQ text output file }
+  \item{read.tag.names}{ Whether the tag names should be read in }
+  \item{fix.chromosome.names}{ Whether to remove ".fa" from the end of
+    the sequence names }
+}
+\value{
+  \item{tags }{ A vector of 5' tag coordinates, with negative values
+    corresponding to tags mapped to the negative strand. }
+  \item{quality }{ Number of mismatches }
+  \item{names }{ Tag names, if \code{read.tag.names} was set }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/read.meland.tags.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,29 @@
+\name{read.meland.tags}
+\alias{read.meland.tags}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Read modified BED tag alignment file that contains variable match
+  length information }
+\description{
+  Reads in an extended BED tag alignment file. An example line given  below:
+  \code{49 . U1 . 1 . . 23 chr2 -234567}
+  The line above specifies a 23-bp portion of the tag tag with id 49 was
+  aligned with 1 mismatch to the negative strand of chr2 at position 234567.
+}
+\usage{
+read.meland.tags(filename, read.tag.names = F, fix.chromosome.names = T)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{filename}{ name of the extended BED file }
+  \item{read.tag.names}{ whether to read in tag names }
+  \item{fix.chromosome.names}{ whether to remove ".fa" from the sequence
+  name ends. }
+}
+\value{
+  \item{tags }{ A vector of 5' tag coordinates, with negative values
+    corresponding to tags mapped to the negative strand. }
+  \item{quality }{ Quality expressed as a float x.y, where x is
+    tag.length - aligned.tag.portion.length, and y is the number of
+    mismatches (must be less than 10). }
+  \item{names }{ Tag names, if \code{read.tag.names} was set }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/remove.local.tag.anomalies.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,46 @@
+\name{remove.local.tag.anomalies}
+\alias{remove.local.tag.anomalies}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Restrict or remove positions with too many tags relative to
+  local background. }
+\description{
+  In Solexa ChIP-seq experiments some anomalous positions contain
+  extremely high number of tags at the exact coordinates. The function
+  scans the chromosomes, determining local tag density based on a
+  provided \code{window.size}, doing two types of corrections:
+  1. removing all tags from positions that exceed local density by
+  \code{eliminate.fold}; 2. reducing the tag count at positions
+  exceeding \code{cap.fold} to the maximal allowed count. The
+  statistical significance of counts exceeding either of these two
+  threshold densities is calculated based on Poisson model, with
+  confidence interval determined by the \code{z.threshold} Z-score parameter.
+}
+\usage{
+remove.local.tag.anomalies(tags, window.size = 200, eliminate.fold = 10, cap.fold = 4, z.threshold = 3)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{tags}{ Chromosome-list of tag vectors }
+  \item{window.size}{ Size of the window used to assess local
+    density. Increasing the window size considerably beyond the size of
+    the binding features will result in flattened profiles, with bound
+    positions exhibiting a difference of just 1 tag beyond the background. }
+  \item{eliminate.fold}{ Threshold definining fold-over background
+    density above which the position is considered anomalous and removed
+  completely.}
+  \item{cap.fold}{ Threshold fold-over background density above which
+    the position is capped to the maximum statistically likely given
+    local tag density }
+  \item{z.threshold}{ Z-score used to assess significance of a given
+    position exceeding either of the two density thresholds. }
+}
+\value{
+  A modified chromosome-wise tag vector list.
+}
+\references{ ~put references to the literature/web site here ~ }
+
+\note{ ~~further notes~~
+  Increasing window.size to very large values will result in flat
+  profiles similar to those described by Zhang et al. "Model-based
+  Analysis of ChIP-Seq (MACS)." Genome Biol. 2008 Sep 17;9(9):R137.
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/select.informative.tags.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,29 @@
+\name{select.informative.tags}
+\alias{select.informative.tags}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Choose informative tags }
+\description{
+  For datasets with tag alignment quality information (e.g. number of
+  mismatches for Eland alignments),
+  \code{\link{get.binding.characteristics}} determines whether inclusion
+  of tags from each specific quality bin improves the cross-correlation
+  profile. The present function is then used to actually select these
+  informative tags, discarding all other information, including quality
+  scores that are not used in further processing.
+}
+\usage{
+select.informative.tags(data, binding.characteristics)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{data}{ Full alignment data (a list with $tags and $quality elements) }
+  \item{binding.characteristics}{ result of a
+    \code{\link{get.binding.characteristics}}  call. If NULL value is
+    supplied,all tags will be accepted. }
+}
+\value{
+  A chromosome-wise tag list. Each element of the list corresponds to a
+  chromosome and is a numeric vector of 5' tag coordinates, with sign
+  designating DNA strand.
+  This form of tag data is used for most of the other processing.
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/spp-package.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,144 @@
+\name{spp-package}
+\alias{spp-package}
+\alias{spp}
+\docType{package}
+\title{
+ChIP-seq (Solexa) Processing Pipeline
+}
+\description{
+A set of routines for reading short sequence alignments, calculating tag
+density, estimates of statistically significant enrichment/depletion
+along the chromosome, identifying point binding positions (peaks), and
+characterizing saturation properties related to sequencing depth.
+}
+\details{
+\tabular{ll}{
+Package: \tab spp\cr
+Type: \tab Package\cr
+Version: \tab 1.8\cr
+Date: \tab 2008-11-14\cr
+License: \tab What license is it under?\cr
+LazyLoad: \tab yes\cr
+}
+See example below for typical processing sequence.y
+}
+\author{Peter Kharchenko <peter.kharchenko@post.harvard.edu>}
+\references{
+Kharchenko P., Tolstorukov M., Park P. "Design and analysis of ChIP-seq
+experiments for DNA-binding proteins." Nature Biotech. doi:10.1038/nbt.1508
+}
+
+\examples{
+
+  # load the library
+  library(spp);
+
+  ## The following section shows how to initialize a cluster of 8 nodes for parallel processing
+  ## To enable parallel processing, uncomment the next three lines, and comment out "cluster<-NULL";
+  ## see "snow" package manual for details.
+  #library(snow)
+  #cluster <- makeCluster(2);
+  #invisible(clusterCall(cluster,source,"routines.r"));
+  cluster <- NULL;
+
+
+  
+  # read in tag alignments
+  chip.data <- read.eland.tags("chip.eland.alignment");
+  input.data <- read.eland.tags("input.eland.alignment");
+  
+  # get binding info from cross-correlation profile
+  # srange gives the possible range for the size of the protected region;
+  # srange should be higher than tag length; making the upper boundary too high will increase calculation time
+  #
+  # bin - bin tags within the specified number of basepairs to speed up calculation;
+  # increasing bin size decreases the accuracy of the determined parameters
+  binding.characteristics <- get.binding.characteristics(chip.data,srange=c(50,500),bin=5,cluster=cluster);
+
+
+  # plot cross-correlation profile
+  pdf(file="example.crosscorrelation.pdf",width=5,height=5)
+  par(mar = c(3.5,3.5,1.0,0.5), mgp = c(2,0.65,0), cex = 0.8);
+  plot(binding.characteristics$cross.correlation,type='l',xlab="strand shift",ylab="cross-correlation");
+  abline(v=binding.characteristics$peak$x,lty=2,col=2)
+  dev.off();
+  
+  # select informative tags based on the binding characteristics
+  chip.data <- select.informative.tags(chip.data,binding.characteristics);
+  input.data <- select.informative.tags(input.data,binding.characteristics);
+
+  # restrict or remove positions with anomalous number of tags relative
+  # to the local density
+  chip.data <- remove.local.tag.anomalies(chip.data);
+  input.data <- remove.local.tag.anomalies(input.data);
+
+
+  # output smoothed tag density (subtracting re-scaled input) into a WIG file
+  # note that the tags are shifted by half of the peak separation distance
+  smoothed.density <- get.smoothed.tag.density(chip.data,control.tags=input.data,bandwidth=200,step=100,tag.shift=round(binding.characteristics$peak$x/2));
+  writewig(smoothed.density,"example.density.wig","Example smoothed, background-subtracted tag density");
+  rm(smoothed.density);
+
+  # output conservative enrichment estimates
+  # alpha specifies significance level at which confidence intervals will be estimated
+  enrichment.estimates <- get.conservative.fold.enrichment.profile(chip.data,input.data,fws=2*binding.characteristics$whs,step=100,alpha=0.01);
+  writewig(enrichment.estimates,"example.enrichment.estimates.wig","Example conservative fold-enrichment/depletion estimates shown on log2 scale");
+  rm(enrichment.estimates);
+
+
+  # binding detection parameters
+  # desired FDR. Alternatively, an E-value can be supplied to the method calls below instead of the fdr parameter
+  fdr <- 1e-2; 
+  # the binding.characteristics contains the optimized half-size for binding detection window
+  detection.window.halfsize <- binding.characteristics$whs;
+  
+  # determine binding positions using wtd method
+  bp <- find.binding.positions(signal.data=chip.data,control.data=input.data,fdr=fdr,method=tag.wtd,whs=detection.window.halfsize,cluster=cluster)
+
+  # alternatively determined binding positions using lwcc method (note: this takes longer than wtd)
+  # bp <- find.binding.positions(signal.data=chip.data,control.data=input.data,fdr=fdr,method=tag.lwcc,whs=detection.window.halfsize,cluster=cluster)
+  
+  print(paste("detected",sum(unlist(lapply(bp$npl,function(d) length(d$x)))),"peaks"));
+  
+  # output detected binding positions
+  output.binding.results(bp,"example.binding.positions.txt");
+
+
+  # ------------------------------------------------------------------------------------------- 
+  # the set of commands in the following section illustrates methods for saturation analysis
+  # these are separated from the previous section, since they are highly CPU intensive
+  # ------------------------------------------------------------------------------------------- 
+
+  # determine MSER
+  # note: this will take approximately 10-15x the amount of time the initial binding detection did
+  # The saturation criteria here is 0.99 consistency in the set of binding positions when adding 1e5 tags.
+  # To ensure convergence the number of subsampled chains (n.chains) should be higher (80)
+  mser <- get.mser(chip.data,input.data,step.size=1e5,test.agreement=0.99,n.chains=8,cluster=cluster,fdr=fdr,method=tag.wtd,whs=detection.window.halfsize)
+  
+  print(paste("MSER at a current depth is",mser));
+  
+  # note: an MSER value of 1 or very near one implies that the set of detected binding positions satisfies saturation criteria without
+  # additional selection by fold-enrichment ratios. In other words, the dataset has reached saturation in a traditional sense (absolute saturation).
+
+  # interpolate MSER dependency on tag count
+  # note: this requires considerably more calculations than the previous steps (~ 3x more than the first MSER calculation)
+  # Here we interpolate MSER dependency to determine a point at which MSER of 2 is reached
+  # The interpolation will be based on the difference in MSER at the current depth, and a depth at 5e5 fewer tags (n.steps=6);
+  # evaluation of the intermediate points is omitted here to speed up the calculation (excluded.steps parameter)
+  # A total of 7 chains is used here to speed up calculation, whereas a higher number of chains (50) would give good convergence
+  msers <- get.mser.interpolation(chip.data,input.data,step.size=1e5,test.agreement=0.99, target.fold.enrichment=2, n.chains=7,n.steps=6,excluded.steps=c(2:4),cluster=cluster,fdr=fdr,method=tag.wtd,whs=detection.window.halfsize)
+
+  print(paste("predicted sequencing depth =",round(unlist(lapply(msers,function(x) x$prediction))/1e6,5)," million tags"))
+  
+
+  # note: the interpolation will return NA prediction if the dataset has reached absolute saturation at the current depth.
+  # note: use return.chains=T to also calculated random chains (returned under msers$chains field) - these can be passed back as
+  #       "get.mser.interpolation( ..., chains=msers$chains)" to calculate predictions for another target.fold.enrichment value
+  #        without having to recalculate the random chain predictions.
+
+  ## stop cluster if it was initialized
+  #stopCluster(cluster);  
+
+
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/write.broadpeak.info.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,16 @@
+\name{write.broadpeak.info}
+\alias{write.broadpeak.info}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Write out determined broad enrichment regions using broadPeak format }
+\description{
+  Writes out broad regions of enrichment determined by the
+  get.broad.enrichment.clusters method in a broadPeak format.
+}
+\usage{
+write.broadpeak.info(broadpeak.results, filename)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{broadpeak.results}{ output of the \code{\link{get.broad.enrichment.clusters}} }
+  \item{filename}{ file name }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/write.narrowpeak.binding.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,21 @@
+\name{write.narrowpeak.binding}
+\alias{write.narrowpeak.binding}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Write out determined binding peaks using narrowPeak format }
+\description{
+  Writes out determined binding positions into a narrowPeak file.
+  The region will correspond to associated broad enrichment region, if
+  such were added using add.broad.peak.regions method. Otherwise the
+  region size will be determined using margin (which defaults to the
+  window half size that was used to determine binding positions)
+}
+\usage{
+write.narrowpeak.binding(results, filename,margin=results$whs)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{results}{ output of the \code{\link{find.binding.positions}} }
+  \item{filename}{ file name }
+  \item{margin}{ explicit value of the margin to be used if the borad
+    region information is absent (defaults to peak detection window half-size}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/man/writewig.Rd	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,31 @@
+\name{writewig}
+\alias{writewig}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ A function to save a list of chromosome-wise x/y data frames
+  into a WIG file format. }
+\description{
+  Takes a list that contains an $x and $y data.frame for a number of
+  chromosomes and writes it out to a WIG BED style format.
+}
+\usage{
+writewig(dat, fname, feature, threshold = 5, zip = F)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{dat}{ Chromosome coordinate-value data. \code{dat} is a list,
+    each member of a list is a data frame with $x and $y columns
+    containing chromosome positions and associated values. The names of
+    the list elements correspond to the chromosomes. }
+  \item{fname}{ Filename to which the output should be written }
+  \item{feature}{ Data description to be incorporated into the WIG header }
+  \item{threshold}{ Optional threshold to be saved in the WIG file}
+  \item{zip}{ Wheter to invoke a zip program to compress the file }
+}
+
+\seealso{ ~~objects to See Also as \code{\link{help}}, ~~~ }
+\examples{
+
+data <- list("chr1"=data.frame(x=c(100,130,200),y=c(1.2,4.0,2.3)));
+writewig(data,"filename");
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BGZF.cpp	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,398 @@
+// ***************************************************************************
+// BGZF.cpp (c) 2009 Derek Barnett, Michael Str�mberg
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// BGZF routines were adapted from the bgzf.c code developed at the Broad
+// Institute.
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading & writing BGZF files
+// ***************************************************************************
+
+#include <BGZF.h>
+using namespace BamTools;
+
+#include <algorithm>
+using namespace std;
+
+BgzfData::BgzfData(void)
+    : UncompressedBlockSize(DEFAULT_BLOCK_SIZE)
+    , CompressedBlockSize(MAX_BLOCK_SIZE)
+    , BlockLength(0)
+    , BlockOffset(0)
+    , BlockAddress(0)
+    , IsOpen(false)
+    , IsWriteOnly(false)
+    , IsWriteUncompressed(false)
+    , Stream(NULL)
+    , UncompressedBlock(NULL)
+    , CompressedBlock(NULL)
+{
+    try {
+        CompressedBlock   = new char[CompressedBlockSize];
+        UncompressedBlock = new char[UncompressedBlockSize];
+    } catch( std::bad_alloc& ba ) {
+        fprintf(stderr, "BGZF ERROR: unable to allocate memory for our BGZF object.\n");
+        exit(1);
+    }
+}
+
+// destructor
+BgzfData::~BgzfData(void) {
+    if( CompressedBlock   ) delete[] CompressedBlock;
+    if( UncompressedBlock ) delete[] UncompressedBlock;
+}
+
+// closes BGZF file
+void BgzfData::Close(void) {
+
+    // skip if file not open, otherwise set flag
+    if ( !IsOpen ) return;
+
+    // if writing to file, flush the current BGZF block,
+    // then write an empty block (as EOF marker)
+    if ( IsWriteOnly ) {
+        FlushBlock();
+        int blockLength = DeflateBlock();
+        fwrite(CompressedBlock, 1, blockLength, Stream);
+    }
+    
+    // flush and close
+    fflush(Stream);
+    fclose(Stream);
+    IsWriteUncompressed = false;
+    IsOpen = false;
+}
+
+// compresses the current block
+int BgzfData::DeflateBlock(void) {
+
+    // initialize the gzip header
+    char* buffer = CompressedBlock;
+    memset(buffer, 0, 18);
+    buffer[0]  = GZIP_ID1;
+    buffer[1]  = (char)GZIP_ID2;
+    buffer[2]  = CM_DEFLATE;
+    buffer[3]  = FLG_FEXTRA;
+    buffer[9]  = (char)OS_UNKNOWN;
+    buffer[10] = BGZF_XLEN;
+    buffer[12] = BGZF_ID1;
+    buffer[13] = BGZF_ID2;
+    buffer[14] = BGZF_LEN;
+
+    // set compression level
+    const int compressionLevel = ( IsWriteUncompressed ? 0 : Z_DEFAULT_COMPRESSION );
+    
+    // loop to retry for blocks that do not compress enough
+    int inputLength = BlockOffset;
+    int compressedLength = 0;
+    unsigned int bufferSize = CompressedBlockSize;
+
+    while ( true ) {
+        
+        // initialize zstream values
+        z_stream zs;
+        zs.zalloc    = NULL;
+        zs.zfree     = NULL;
+        zs.next_in   = (Bytef*)UncompressedBlock;
+        zs.avail_in  = inputLength;
+        zs.next_out  = (Bytef*)&buffer[BLOCK_HEADER_LENGTH];
+        zs.avail_out = bufferSize - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
+
+        // initialize the zlib compression algorithm
+        if ( deflateInit2(&zs, compressionLevel, Z_DEFLATED, GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK ) {
+            fprintf(stderr, "BGZF ERROR: zlib deflate initialization failed.\n");
+            exit(1);
+        }
+
+        // compress the data
+        int status = deflate(&zs, Z_FINISH);
+        if ( status != Z_STREAM_END ) {
+
+            deflateEnd(&zs);
+
+            // reduce the input length and try again
+            if ( status == Z_OK ) {
+                inputLength -= 1024;
+                if( inputLength < 0 ) {
+                    fprintf(stderr, "BGZF ERROR: input reduction failed.\n");
+                    exit(1);
+                }
+                continue;
+            }
+
+            fprintf(stderr, "BGZF ERROR: zlib::deflateEnd() failed.\n");
+            exit(1);
+        }
+
+        // finalize the compression routine
+        if ( deflateEnd(&zs) != Z_OK ) {
+            fprintf(stderr, "BGZF ERROR: zlib::deflateEnd() failed.\n");
+            exit(1);
+        }
+
+        compressedLength = zs.total_out;
+        compressedLength += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
+        if ( compressedLength > MAX_BLOCK_SIZE ) {
+            fprintf(stderr, "BGZF ERROR: deflate overflow.\n");
+            exit(1);
+        }
+
+        break;
+    }
+
+    // store the compressed length
+    BgzfData::PackUnsignedShort(&buffer[16], (unsigned short)(compressedLength - 1));
+
+    // store the CRC32 checksum
+    unsigned int crc = crc32(0, NULL, 0);
+    crc = crc32(crc, (Bytef*)UncompressedBlock, inputLength);
+    BgzfData::PackUnsignedInt(&buffer[compressedLength - 8], crc);
+    BgzfData::PackUnsignedInt(&buffer[compressedLength - 4], inputLength);
+
+    // ensure that we have less than a block of data left
+    int remaining = BlockOffset - inputLength;
+    if ( remaining > 0 ) {
+        if ( remaining > inputLength ) {
+            fprintf(stderr, "BGZF ERROR: after deflate, remainder too large.\n");
+            exit(1);
+        }
+        memcpy(UncompressedBlock, UncompressedBlock + inputLength, remaining);
+    }
+
+    BlockOffset = remaining;
+    return compressedLength;
+}
+
+// flushes the data in the BGZF block
+void BgzfData::FlushBlock(void) {
+
+    // flush all of the remaining blocks
+    while ( BlockOffset > 0 ) {
+
+        // compress the data block
+        int blockLength = DeflateBlock();
+
+        // flush the data to our output stream
+        int numBytesWritten = fwrite(CompressedBlock, 1, blockLength, Stream);
+
+        if ( numBytesWritten != blockLength ) {
+          fprintf(stderr, "BGZF ERROR: expected to write %u bytes during flushing, but wrote %u bytes.\n", blockLength, numBytesWritten);
+          exit(1);
+        }
+              
+        BlockAddress += blockLength;
+    }
+}
+
+// de-compresses the current block
+int BgzfData::InflateBlock(const int& blockLength) {
+
+    // Inflate the block in m_BGZF.CompressedBlock into m_BGZF.UncompressedBlock
+    z_stream zs;
+    zs.zalloc    = NULL;
+    zs.zfree     = NULL;
+    zs.next_in   = (Bytef*)CompressedBlock + 18;
+    zs.avail_in  = blockLength - 16;
+    zs.next_out  = (Bytef*)UncompressedBlock;
+    zs.avail_out = UncompressedBlockSize;
+
+    int status = inflateInit2(&zs, GZIP_WINDOW_BITS);
+    if ( status != Z_OK ) {
+        fprintf(stderr, "BGZF ERROR: could not decompress block - zlib::inflateInit() failed\n");
+        return -1;
+    }
+
+    status = inflate(&zs, Z_FINISH);
+    if ( status != Z_STREAM_END ) {
+        inflateEnd(&zs);
+        fprintf(stderr, "BGZF ERROR: could not decompress block - zlib::inflate() failed\n");
+        return -1;
+    }
+
+    status = inflateEnd(&zs);
+    if ( status != Z_OK ) {
+        fprintf(stderr, "BGZF ERROR: could not decompress block - zlib::inflateEnd() failed\n");
+        return -1;
+    }
+
+    return zs.total_out;
+}
+
+// opens the BGZF file for reading (mode is either "rb" for reading, or "wb" for writing)
+bool BgzfData::Open(const string& filename, const char* mode, bool isWriteUncompressed ) {
+
+    // determine open mode
+    if ( strcmp(mode, "rb") == 0 )
+        IsWriteOnly = false;
+    else if ( strcmp(mode, "wb") == 0) 
+        IsWriteOnly = true;
+    else {
+        fprintf(stderr, "BGZF ERROR: unknown file mode: %s\n", mode);
+        return false; 
+    }
+
+    // ----------------------------------------------------------------
+    // open Stream to read to/write from file, stdin, or stdout
+    // stdin/stdout option contributed by Aaron Quinlan (2010-Jan-03)
+    
+    // read/write BGZF data to/from a file
+    if ( (filename != "stdin") && (filename != "stdout") )
+        Stream = fopen(filename.c_str(), mode);
+    
+    // read BGZF data from stdin
+    else if ( (filename == "stdin") && (strcmp(mode, "rb") == 0 ) )
+        Stream = freopen(NULL, mode, stdin);
+    
+    // write BGZF data to stdout
+    else if ( (filename == "stdout") && (strcmp(mode, "wb") == 0) )
+        Stream = freopen(NULL, mode, stdout);
+
+    if ( !Stream ) {
+        fprintf(stderr, "BGZF ERROR: unable to open file %s\n", filename.c_str() );
+        return false;
+    }
+    
+    // set flags, return success
+    IsOpen = true;
+    IsWriteUncompressed = isWriteUncompressed;
+    return true;
+}
+
+// reads BGZF data into a byte buffer
+int BgzfData::Read(char* data, const unsigned int dataLength) {
+
+   if ( !IsOpen || IsWriteOnly || dataLength == 0 ) return 0;
+
+   char* output = data;
+   unsigned int numBytesRead = 0;
+   while ( numBytesRead < dataLength ) {
+
+       int bytesAvailable = BlockLength - BlockOffset;
+       if ( bytesAvailable <= 0 ) {
+           if ( !ReadBlock() ) return -1; 
+           bytesAvailable = BlockLength - BlockOffset;
+           if ( bytesAvailable <= 0 ) break;
+       }
+
+       char* buffer   = UncompressedBlock;
+       int copyLength = min( (int)(dataLength-numBytesRead), bytesAvailable );
+       memcpy(output, buffer + BlockOffset, copyLength);
+
+       BlockOffset  += copyLength;
+       output       += copyLength;
+       numBytesRead += copyLength;
+   }
+
+   if ( BlockOffset == BlockLength ) {
+       BlockAddress = ftell64(Stream);
+       BlockOffset  = 0;
+       BlockLength  = 0;
+   }
+
+   return numBytesRead;
+}
+
+// reads a BGZF block
+bool BgzfData::ReadBlock(void) {
+
+    char    header[BLOCK_HEADER_LENGTH];
+    int64_t blockAddress = ftell64(Stream);
+    
+    int count = fread(header, 1, sizeof(header), Stream);
+    if ( count == 0 ) {
+        BlockLength = 0;
+        return true;
+    }
+
+    if ( count != sizeof(header) ) {
+        fprintf(stderr, "BGZF ERROR: read block failed - could not read block header\n");
+        return false;
+    }
+
+    if ( !BgzfData::CheckBlockHeader(header) ) {
+        fprintf(stderr, "BGZF ERROR: read block failed - invalid block header\n");
+        return false;
+    }
+
+    int blockLength = BgzfData::UnpackUnsignedShort(&header[16]) + 1;
+    char* compressedBlock = CompressedBlock;
+    memcpy(compressedBlock, header, BLOCK_HEADER_LENGTH);
+    int remaining = blockLength - BLOCK_HEADER_LENGTH;
+
+    count = fread(&compressedBlock[BLOCK_HEADER_LENGTH], 1, remaining, Stream);
+    if ( count != remaining ) {
+        fprintf(stderr, "BGZF ERROR: read block failed - could not read data from block\n");
+        return false;
+    }
+
+    count = InflateBlock(blockLength);
+    if ( count < 0 ) { 
+      fprintf(stderr, "BGZF ERROR: read block failed - could not decompress block data\n");
+      return false;
+    }
+
+    if ( BlockLength != 0 )
+        BlockOffset = 0;
+
+    BlockAddress = blockAddress;
+    BlockLength  = count;
+    return true;
+}
+
+// seek to position in BGZF file
+bool BgzfData::Seek(int64_t position) {
+
+    if ( !IsOpen ) return false;
+  
+    int     blockOffset  = (position & 0xFFFF);
+    int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;
+
+    if ( fseek64(Stream, blockAddress, SEEK_SET) != 0 ) {
+        fprintf(stderr, "BGZF ERROR: unable to seek in file\n");
+        return false;
+    }
+
+    BlockLength  = 0;
+    BlockAddress = blockAddress;
+    BlockOffset  = blockOffset;
+    return true;
+}
+
+// get file position in BGZF file
+int64_t BgzfData::Tell(void) {
+    if ( !IsOpen ) 
+        return false;
+    else 
+        return ( (BlockAddress << 16) | (BlockOffset & 0xFFFF) );
+}
+
+// writes the supplied data into the BGZF buffer
+unsigned int BgzfData::Write(const char* data, const unsigned int dataLen) {
+
+    if ( !IsOpen || !IsWriteOnly ) return false;
+  
+    // initialize
+    unsigned int numBytesWritten = 0;
+    const char* input = data;
+    unsigned int blockLength = UncompressedBlockSize;
+
+    // copy the data to the buffer
+    while ( numBytesWritten < dataLen ) {
+      
+        unsigned int copyLength = min(blockLength - BlockOffset, dataLen - numBytesWritten);
+        char* buffer = UncompressedBlock;
+        memcpy(buffer + BlockOffset, input, copyLength);
+
+        BlockOffset     += copyLength;
+        input           += copyLength;
+        numBytesWritten += copyLength;
+
+        if ( BlockOffset == blockLength )
+            FlushBlock();
+    }
+
+    return numBytesWritten;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BGZF.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,322 @@
+// ***************************************************************************
+// BGZF.h (c) 2009 Derek Barnett, Michael Str�mberg
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// BGZF routines were adapted from the bgzf.c code developed at the Broad
+// Institute.
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading & writing BGZF files
+// ***************************************************************************
+
+#ifndef BGZF_H
+#define BGZF_H
+
+#include <api_global.h>
+#include <zlib.h>
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+
+// Platform-specific large-file support
+#ifndef BAMTOOLS_LFS
+#define BAMTOOLS_LFS
+    #ifdef WIN32
+        #define ftell64(a)     _ftelli64(a)
+        #define fseek64(a,b,c) _fseeki64(a,b,c)
+    #else
+        #define ftell64(a)     ftello(a)
+        #define fseek64(a,b,c) fseeko(a,b,c) 
+    #endif
+#endif // BAMTOOLS_LFS
+
+// Platform-specific type definitions
+#ifndef BAMTOOLS_TYPES
+#define BAMTOOLS_TYPES
+    #ifdef _MSC_VER
+        typedef char                 int8_t;
+        typedef unsigned char       uint8_t;
+        typedef short               int16_t;
+        typedef unsigned short     uint16_t;
+        typedef int                 int32_t;
+        typedef unsigned int       uint32_t;
+        typedef long long           int64_t;
+        typedef unsigned long long uint64_t;
+    #else    
+        #include <stdint.h>
+    #endif
+#endif // BAMTOOLS_TYPES
+
+namespace BamTools {
+
+// zlib constants
+const int GZIP_ID1   = 31;
+const int GZIP_ID2   = 139;
+const int CM_DEFLATE = 8;
+const int FLG_FEXTRA = 4;
+const int OS_UNKNOWN = 255;
+const int BGZF_XLEN  = 6;
+const int BGZF_ID1   = 66;
+const int BGZF_ID2   = 67;
+const int BGZF_LEN   = 2;
+const int GZIP_WINDOW_BITS    = -15;
+const int Z_DEFAULT_MEM_LEVEL = 8;
+
+// BZGF constants
+const int BLOCK_HEADER_LENGTH = 18;
+const int BLOCK_FOOTER_LENGTH = 8;
+const int MAX_BLOCK_SIZE      = 65536;
+const int DEFAULT_BLOCK_SIZE  = 65536;
+
+struct API_EXPORT BgzfData {
+
+    // data members
+    public:
+        unsigned int UncompressedBlockSize;
+        unsigned int CompressedBlockSize;
+        unsigned int BlockLength;
+        unsigned int BlockOffset;
+        uint64_t BlockAddress;
+        bool     IsOpen;
+        bool     IsWriteOnly;
+        bool     IsWriteUncompressed;
+        FILE*    Stream;
+        char*    UncompressedBlock;
+        char*    CompressedBlock;
+
+    // constructor & destructor
+    public:
+        BgzfData(void);
+        ~BgzfData(void);
+
+    // main interface methods
+    public:       
+        // closes BGZF file
+        void Close(void);
+        // opens the BGZF file (mode is either "rb" for reading, or "wb" for writing)
+        bool Open(const std::string& filename, const char* mode, bool isWriteUncompressed = false);
+        // reads BGZF data into a byte buffer
+        int Read(char* data, const unsigned int dataLength);
+        // seek to position in BGZF file
+        bool Seek(int64_t position);
+        // get file position in BGZF file
+        int64_t Tell(void);
+        // writes the supplied data into the BGZF buffer
+        unsigned int Write(const char* data, const unsigned int dataLen);
+
+    // internal methods
+    private:
+        // compresses the current block
+        int DeflateBlock(void);
+        // flushes the data in the BGZF block
+        void FlushBlock(void);
+        // de-compresses the current block
+        int InflateBlock(const int& blockLength);
+        // reads a BGZF block
+        bool ReadBlock(void);
+    
+    // static 'utility' methods
+    public:
+        // checks BGZF block header
+        static inline bool CheckBlockHeader(char* header);
+        // packs an unsigned integer into the specified buffer
+        static inline void PackUnsignedInt(char* buffer, unsigned int value);
+        // packs an unsigned short into the specified buffer
+        static inline void PackUnsignedShort(char* buffer, unsigned short value);
+        // unpacks a buffer into a double
+        static inline double UnpackDouble(char* buffer);
+        static inline double UnpackDouble(const char* buffer);
+        // unpacks a buffer into a float
+        static inline float UnpackFloat(char* buffer);
+        static inline float UnpackFloat(const char* buffer);
+        // unpacks a buffer into a signed int
+        static inline signed int UnpackSignedInt(char* buffer);
+        static inline signed int UnpackSignedInt(const char* buffer);
+        // unpacks a buffer into a signed short
+        static inline signed short UnpackSignedShort(char* buffer);
+        static inline signed short UnpackSignedShort(const char* buffer);
+        // unpacks a buffer into an unsigned int
+        static inline unsigned int UnpackUnsignedInt(char* buffer);
+        static inline unsigned int UnpackUnsignedInt(const char* buffer);
+        // unpacks a buffer into an unsigned short
+        static inline unsigned short UnpackUnsignedShort(char* buffer);
+        static inline unsigned short UnpackUnsignedShort(const char* buffer);
+};
+
+// -------------------------------------------------------------
+// static 'utility' method implementations
+
+// checks BGZF block header
+inline
+bool BgzfData::CheckBlockHeader(char* header) {
+    return (header[0] == GZIP_ID1 &&
+            header[1] == (char)GZIP_ID2 &&
+            header[2] == Z_DEFLATED &&
+            (header[3] & FLG_FEXTRA) != 0 &&
+            BgzfData::UnpackUnsignedShort(&header[10]) == BGZF_XLEN &&
+            header[12] == BGZF_ID1 &&
+            header[13] == BGZF_ID2 &&
+            BgzfData::UnpackUnsignedShort(&header[14]) == BGZF_LEN );
+}
+
+// 'packs' an unsigned integer into the specified buffer
+inline
+void BgzfData::PackUnsignedInt(char* buffer, unsigned int value) {
+    buffer[0] = (char)value;
+    buffer[1] = (char)(value >> 8);
+    buffer[2] = (char)(value >> 16);
+    buffer[3] = (char)(value >> 24);
+}
+
+// 'packs' an unsigned short into the specified buffer
+inline
+void BgzfData::PackUnsignedShort(char* buffer, unsigned short value) {
+    buffer[0] = (char)value;
+    buffer[1] = (char)(value >> 8);
+}
+
+// 'unpacks' a buffer into a double (includes both non-const & const char* flavors)
+inline
+double BgzfData::UnpackDouble(char* buffer) {
+    union { double value; unsigned char valueBuffer[sizeof(double)]; } un;
+    un.value = 0;
+    un.valueBuffer[0] = buffer[0];
+    un.valueBuffer[1] = buffer[1];
+    un.valueBuffer[2] = buffer[2];
+    un.valueBuffer[3] = buffer[3];
+    un.valueBuffer[4] = buffer[4];
+    un.valueBuffer[5] = buffer[5];
+    un.valueBuffer[6] = buffer[6];
+    un.valueBuffer[7] = buffer[7];
+    return un.value;
+}
+
+inline
+double BgzfData::UnpackDouble(const char* buffer) {
+    union { double value; unsigned char valueBuffer[sizeof(double)]; } un;
+    un.value = 0;
+    un.valueBuffer[0] = buffer[0];
+    un.valueBuffer[1] = buffer[1];
+    un.valueBuffer[2] = buffer[2];
+    un.valueBuffer[3] = buffer[3];
+    un.valueBuffer[4] = buffer[4];
+    un.valueBuffer[5] = buffer[5];
+    un.valueBuffer[6] = buffer[6];
+    un.valueBuffer[7] = buffer[7];
+    return un.value;
+}
+
+// 'unpacks' a buffer into a float (includes both non-const & const char* flavors)
+inline
+float BgzfData::UnpackFloat(char* buffer) {
+    union { float value; unsigned char valueBuffer[sizeof(float)]; } un;
+    un.value = 0;
+    un.valueBuffer[0] = buffer[0];
+    un.valueBuffer[1] = buffer[1];
+    un.valueBuffer[2] = buffer[2];
+    un.valueBuffer[3] = buffer[3];
+    return un.value;
+}
+
+inline
+float BgzfData::UnpackFloat(const char* buffer) {
+    union { float value; unsigned char valueBuffer[sizeof(float)]; } un;
+    un.value = 0;
+    un.valueBuffer[0] = buffer[0];
+    un.valueBuffer[1] = buffer[1];
+    un.valueBuffer[2] = buffer[2];
+    un.valueBuffer[3] = buffer[3];
+    return un.value;
+}
+
+// 'unpacks' a buffer into a signed int (includes both non-const & const char* flavors)
+inline
+signed int BgzfData::UnpackSignedInt(char* buffer) {
+    union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;
+    un.value = 0;
+    un.valueBuffer[0] = buffer[0];
+    un.valueBuffer[1] = buffer[1];
+    un.valueBuffer[2] = buffer[2];
+    un.valueBuffer[3] = buffer[3];
+    return un.value;
+}
+
+inline
+signed int BgzfData::UnpackSignedInt(const char* buffer) {
+    union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;
+    un.value = 0;
+    un.valueBuffer[0] = buffer[0];
+    un.valueBuffer[1] = buffer[1];
+    un.valueBuffer[2] = buffer[2];
+    un.valueBuffer[3] = buffer[3];
+    return un.value;
+}
+
+// 'unpacks' a buffer into a signed short (includes both non-const & const char* flavors)
+inline
+signed short BgzfData::UnpackSignedShort(char* buffer) {
+    union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;
+    un.value = 0;
+    un.valueBuffer[0] = buffer[0];
+    un.valueBuffer[1] = buffer[1];
+    return un.value;
+}
+
+inline
+signed short BgzfData::UnpackSignedShort(const char* buffer) {
+    union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;
+    un.value = 0;
+    un.valueBuffer[0] = buffer[0];
+    un.valueBuffer[1] = buffer[1];
+    return un.value;
+}
+
+// 'unpacks' a buffer into an unsigned int (includes both non-const & const char* flavors)
+inline
+unsigned int BgzfData::UnpackUnsignedInt(char* buffer) {
+    union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;
+    un.value = 0;
+    un.valueBuffer[0] = buffer[0];
+    un.valueBuffer[1] = buffer[1];
+    un.valueBuffer[2] = buffer[2];
+    un.valueBuffer[3] = buffer[3];
+    return un.value;
+}
+
+inline
+unsigned int BgzfData::UnpackUnsignedInt(const char* buffer) {
+    union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;
+    un.value = 0;
+    un.valueBuffer[0] = buffer[0];
+    un.valueBuffer[1] = buffer[1];
+    un.valueBuffer[2] = buffer[2];
+    un.valueBuffer[3] = buffer[3];
+    return un.value;
+}
+
+// 'unpacks' a buffer into an unsigned short (includes both non-const & const char* flavors)
+inline
+unsigned short BgzfData::UnpackUnsignedShort(char* buffer) {
+    union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;
+    un.value = 0;
+    un.valueBuffer[0] = buffer[0];
+    un.valueBuffer[1] = buffer[1];
+    return un.value;
+}
+
+inline
+unsigned short BgzfData::UnpackUnsignedShort(const char* buffer) {
+    union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;
+    un.value = 0;
+    un.valueBuffer[0] = buffer[0];
+    un.valueBuffer[1] = buffer[1];
+    return un.value;
+}
+
+} // namespace BamTools
+
+#endif // BGZF_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamAlignment.cpp	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,696 @@
+// ***************************************************************************
+// BamAlignment.cpp (c) 2009 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 13 December 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides the BamAlignment data structure
+// ***************************************************************************
+
+#include <BamAlignment.h>
+using namespace BamTools;
+
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <exception>
+#include <map>
+#include <utility>
+using namespace std;
+
+// default ctor
+BamAlignment::BamAlignment(void) 
+    : RefID(-1)
+    , Position(-1)
+    , MateRefID(-1)
+    , MatePosition(-1)
+    , InsertSize(0)
+{ }
+
+// copy ctor
+BamAlignment::BamAlignment(const BamAlignment& other)
+    : Name(other.Name)
+    , Length(other.Length)
+    , QueryBases(other.QueryBases)
+    , AlignedBases(other.AlignedBases)
+    , Qualities(other.Qualities)
+    , TagData(other.TagData)
+    , RefID(other.RefID)
+    , Position(other.Position)
+    , Bin(other.Bin)
+    , MapQuality(other.MapQuality)
+    , AlignmentFlag(other.AlignmentFlag)
+    , CigarData(other.CigarData)
+    , MateRefID(other.MateRefID)
+    , MatePosition(other.MatePosition)
+    , InsertSize(other.InsertSize)
+    , SupportData(other.SupportData)
+{ }
+
+// dtor
+BamAlignment::~BamAlignment(void) { }
+
+// Queries against alignment flags
+bool BamAlignment::IsDuplicate(void) const         { return ( (AlignmentFlag & DUPLICATE)     != 0 ); }
+bool BamAlignment::IsFailedQC(void) const          { return ( (AlignmentFlag & QC_FAILED)     != 0 ); }
+bool BamAlignment::IsFirstMate(void) const         { return ( (AlignmentFlag & READ_1)        != 0 ); }
+bool BamAlignment::IsMapped(void) const            { return ( (AlignmentFlag & UNMAPPED)      == 0 ); }
+bool BamAlignment::IsMateMapped(void) const        { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }
+bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE)  != 0 ); }
+bool BamAlignment::IsPaired(void) const            { return ( (AlignmentFlag & PAIRED)        != 0 ); }
+bool BamAlignment::IsPrimaryAlignment(void) const  { return ( (AlignmentFlag & SECONDARY)     == 0 ); }
+bool BamAlignment::IsProperPair(void) const        { return ( (AlignmentFlag & PROPER_PAIR)   != 0 ); }
+bool BamAlignment::IsReverseStrand(void) const     { return ( (AlignmentFlag & REVERSE)       != 0 ); }
+bool BamAlignment::IsSecondMate(void) const        { return ( (AlignmentFlag & READ_2)        != 0 ); }
+
+// Manipulate alignment flags 
+void BamAlignment::SetIsDuplicate(bool ok)          { if (ok) AlignmentFlag |= DUPLICATE;     else AlignmentFlag &= ~DUPLICATE; }
+void BamAlignment::SetIsFailedQC(bool ok)           { if (ok) AlignmentFlag |= QC_FAILED;     else AlignmentFlag &= ~QC_FAILED; }
+void BamAlignment::SetIsFirstMate(bool ok)          { if (ok) AlignmentFlag |= READ_1;        else AlignmentFlag &= ~READ_1; }
+void BamAlignment::SetIsMapped(bool ok)             { SetIsUnmapped(!ok); }
+void BamAlignment::SetIsMateMapped(bool ok)         { SetIsMateUnmapped(!ok); }
+void BamAlignment::SetIsMateUnmapped(bool ok)       { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }
+void BamAlignment::SetIsMateReverseStrand(bool ok)  { if (ok) AlignmentFlag |= MATE_REVERSE;  else AlignmentFlag &= ~MATE_REVERSE; }
+void BamAlignment::SetIsPaired(bool ok)             { if (ok) AlignmentFlag |= PAIRED;        else AlignmentFlag &= ~PAIRED; }
+void BamAlignment::SetIsPrimaryAlignment(bool ok)   { SetIsSecondaryAlignment(!ok); }
+void BamAlignment::SetIsProperPair(bool ok)         { if (ok) AlignmentFlag |= PROPER_PAIR;   else AlignmentFlag &= ~PROPER_PAIR; }
+void BamAlignment::SetIsReverseStrand(bool ok)      { if (ok) AlignmentFlag |= REVERSE;       else AlignmentFlag &= ~REVERSE; }
+void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY;     else AlignmentFlag &= ~SECONDARY; }
+void BamAlignment::SetIsSecondMate(bool ok)         { if (ok) AlignmentFlag |= READ_2;        else AlignmentFlag &= ~READ_2; }
+void BamAlignment::SetIsUnmapped(bool ok)           { if (ok) AlignmentFlag |= UNMAPPED;      else AlignmentFlag &= ~UNMAPPED; }
+
+// calculates alignment end position, based on starting position and CIGAR operations
+int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
+
+    // initialize alignment end to starting position
+    int alignEnd = Position;
+
+    // iterate over cigar operations
+    vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
+    vector<CigarOp>::const_iterator cigarEnd  = CigarData.end();
+    for ( ; cigarIter != cigarEnd; ++cigarIter) {
+        const char cigarType = (*cigarIter).Type;
+        if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' )
+            alignEnd += (*cigarIter).Length;
+        else if ( usePadded && cigarType == 'I' )
+            alignEnd += (*cigarIter).Length;
+    }
+    
+    // adjust for zeroBased, if necessary
+    if (zeroBased) 
+        return alignEnd - 1;
+    else 
+        return alignEnd;
+}
+
+bool BamAlignment::AddTag(const string& tag, const string& type, const string& value) {
+  
+    if ( SupportData.HasCoreOnly ) return false;
+    if ( tag.size() != 2 || type.size() != 1 ) return false;
+    if ( type != "Z" && type != "H" ) return false;
+  
+    // localize the tag data
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLength = TagData.size();
+    unsigned int numBytesParsed = 0;
+    
+    // if tag already exists, return false
+    // use EditTag explicitly instead
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
+  
+    // otherwise, copy tag data to temp buffer
+    string newTag = tag + type + value;
+    const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
+    char originalTagData[newTagDataLength];
+    memcpy(originalTagData, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term
+    
+    // append newTag
+    strcat(originalTagData + tagDataLength, newTag.data());  // removes original null-term, appends newTag + null-term
+    
+    // store temp buffer back in TagData
+    const char* newTagData = (const char*)originalTagData;
+    TagData.assign(newTagData, newTagDataLength);
+    
+    // return success
+    return true;
+}
+
+bool BamAlignment::AddTag(const string& tag, const string& type, const uint32_t& value) {
+  
+    if ( SupportData.HasCoreOnly ) return false;
+    if ( tag.size() != 2 || type.size() != 1 ) return false;
+    if ( type == "f" || type == "Z" || type == "H" ) return false;
+  
+    // localize the tag data
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLength = TagData.size();
+    unsigned int numBytesParsed = 0;
+    
+    // if tag already exists, return false
+    // use EditTag explicitly instead
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
+  
+    // otherwise, convert value to string
+    union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
+    un.value = value;
+
+    // copy original tag data to temp buffer
+    string newTag = tag + type;
+    const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer
+    char originalTagData[newTagDataLength];
+    memcpy(originalTagData, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term
+    
+    // append newTag
+    strcat(originalTagData + tagDataLength, newTag.data());
+    memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(unsigned int));
+    
+    // store temp buffer back in TagData
+    const char* newTagData = (const char*)originalTagData;
+    TagData.assign(newTagData, newTagDataLength);
+    
+    // return success
+    return true;
+}
+
+bool BamAlignment::AddTag(const string& tag, const string& type, const int32_t& value) {
+    return AddTag(tag, type, (const uint32_t&)value);
+}
+
+bool BamAlignment::AddTag(const string& tag, const string& type, const float& value) {
+  
+    if ( SupportData.HasCoreOnly ) return false;
+    if ( tag.size() != 2 || type.size() != 1 ) return false;
+    if ( type == "Z" || type == "H" ) return false;
+  
+    // localize the tag data
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLength = TagData.size();
+    unsigned int numBytesParsed = 0;
+    
+    // if tag already exists, return false
+    // use EditTag explicitly instead
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
+  
+    // otherwise, convert value to string
+    union { float value; char valueBuffer[sizeof(float)]; } un;
+    un.value = value;
+
+    // copy original tag data to temp buffer
+    string newTag = tag + type;
+    const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float
+    char originalTagData[newTagDataLength];
+    memcpy(originalTagData, TagData.c_str(), tagDataLength + 1);    // '+1' for TagData null-term
+    
+    // append newTag
+    strcat(originalTagData + tagDataLength, newTag.data());
+    memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));
+    
+    // store temp buffer back in TagData
+    const char* newTagData = (const char*)originalTagData;
+    TagData.assign(newTagData, newTagDataLength);
+    
+    // return success
+    return true;
+}
+
+bool BamAlignment::EditTag(const string& tag, const string& type, const string& value) {
+  
+    if ( SupportData.HasCoreOnly ) return false;
+    if ( tag.size() != 2 || type.size() != 1 ) return false;
+    if ( type != "Z" && type != "H" ) return false;
+  
+    // localize the tag data
+    char* pOriginalTagData = (char*)TagData.data();
+    char* pTagData = pOriginalTagData;
+    const unsigned int originalTagDataLength = TagData.size();
+    
+    unsigned int newTagDataLength = 0;
+    unsigned int numBytesParsed = 0;
+    
+    // if tag found, store data in readGroup, return success
+    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
+        
+        // make sure array is more than big enough
+        char newTagData[originalTagDataLength + value.size()];  
+
+        // copy original tag data up til desired tag
+        const unsigned int beginningTagDataLength = numBytesParsed;
+        newTagDataLength += beginningTagDataLength;
+        memcpy(newTagData, pOriginalTagData, numBytesParsed);
+      
+        // copy new VALUE in place of current tag data
+        const unsigned int dataLength = strlen(value.c_str());
+        memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
+        
+        // skip to next tag (if tag for removal is last, return true) 
+        const char* pTagStorageType = pTagData - 1;
+        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
+         
+        // copy everything from current tag (the next one after tag for removal) to end
+        const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
+        const unsigned int endTagOffset      = beginningTagDataLength + dataLength + 1;
+        const unsigned int endTagDataLength  = originalTagDataLength - beginningTagDataLength - skippedDataLength;
+        memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
+        
+        // ensure null-terminator
+        newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
+        
+        // save new tag data
+        TagData.assign(newTagData, endTagOffset + endTagDataLength);
+        return true;
+    }
+    
+    // tag not found, attempt AddTag
+    else return AddTag(tag, type, value);
+}
+
+bool BamAlignment::EditTag(const string& tag, const string& type, const uint32_t& value) {
+  
+    if ( SupportData.HasCoreOnly ) return false;
+    if ( tag.size() != 2 || type.size() != 1 ) return false;
+    if ( type == "f" || type == "Z" || type == "H" ) return false;
+    
+     // localize the tag data
+    char* pOriginalTagData = (char*)TagData.data();
+    char* pTagData = pOriginalTagData;
+    const unsigned int originalTagDataLength = TagData.size();
+    
+    unsigned int newTagDataLength = 0;
+    unsigned int numBytesParsed = 0;
+    
+    // if tag found, store data in readGroup, return success
+    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
+        
+        // make sure array is more than big enough
+        char newTagData[originalTagDataLength + sizeof(value)];  
+
+        // copy original tag data up til desired tag
+        const unsigned int beginningTagDataLength = numBytesParsed;
+        newTagDataLength += beginningTagDataLength;
+        memcpy(newTagData, pOriginalTagData, numBytesParsed);
+      
+        // copy new VALUE in place of current tag data
+        union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
+        un.value = value;
+        memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(unsigned int));
+        
+        // skip to next tag (if tag for removal is last, return true) 
+        const char* pTagStorageType = pTagData - 1;
+        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
+         
+        // copy everything from current tag (the next one after tag for removal) to end
+        const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
+        const unsigned int endTagOffset      = beginningTagDataLength + sizeof(unsigned int);
+        const unsigned int endTagDataLength  = originalTagDataLength - beginningTagDataLength - skippedDataLength;
+        memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
+        
+        // ensure null-terminator
+        newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
+        
+        // save new tag data
+        TagData.assign(newTagData, endTagOffset + endTagDataLength);
+        return true;
+    }
+    
+    // tag not found, attempt AddTag
+    else return AddTag(tag, type, value);
+}
+
+bool BamAlignment::EditTag(const string& tag, const string& type, const int32_t& value) {
+    return EditTag(tag, type, (const uint32_t&)value);
+}
+
+bool BamAlignment::EditTag(const string& tag, const string& type, const float& value) {
+  
+    if ( SupportData.HasCoreOnly ) return false;
+    if ( tag.size() != 2 || type.size() != 1 ) return false;
+    if ( type == "Z" || type == "H" ) return false;
+    
+     // localize the tag data
+    char* pOriginalTagData = (char*)TagData.data();
+    char* pTagData = pOriginalTagData;
+    const unsigned int originalTagDataLength = TagData.size();
+    
+    unsigned int newTagDataLength = 0;
+    unsigned int numBytesParsed = 0;
+    
+    // if tag found, store data in readGroup, return success
+    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
+        
+        // make sure array is more than big enough
+        char newTagData[originalTagDataLength + sizeof(value)];  
+
+        // copy original tag data up til desired tag
+        const unsigned int beginningTagDataLength = numBytesParsed;
+        newTagDataLength += beginningTagDataLength;
+        memcpy(newTagData, pOriginalTagData, numBytesParsed);
+      
+        // copy new VALUE in place of current tag data
+        union { float value; char valueBuffer[sizeof(float)]; } un;
+        un.value = value;
+        memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
+        
+        // skip to next tag (if tag for removal is last, return true) 
+        const char* pTagStorageType = pTagData - 1;
+        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
+         
+        // copy everything from current tag (the next one after tag for removal) to end
+        const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
+        const unsigned int endTagOffset      = beginningTagDataLength + sizeof(float);
+        const unsigned int endTagDataLength  = originalTagDataLength - beginningTagDataLength - skippedDataLength;
+        memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
+        
+        // ensure null-terminator
+        newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
+        
+        // save new tag data
+        TagData.assign(newTagData, endTagOffset + endTagDataLength);
+        return true;
+    }
+    
+    // tag not found, attempt AddTag
+    else return AddTag(tag, type, value);
+}
+
+// get "NM" tag data - originally contributed by Aaron Quinlan
+// stores data in 'editDistance', returns success/fail
+bool BamAlignment::GetEditDistance(uint32_t& editDistance) const { 
+    return GetTag("NM", (uint32_t&)editDistance);
+}
+
+// get "RG" tag data
+// stores data in 'readGroup', returns success/fail
+bool BamAlignment::GetReadGroup(string& readGroup) const {
+    return GetTag("RG", readGroup);
+}
+
+bool BamAlignment::GetTag(const string& tag, string& destination) const {
+
+    // make sure tag data exists
+    if ( SupportData.HasCoreOnly || TagData.empty() ) 
+        return false;
+
+    // localize the tag data
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLength = TagData.size();
+    unsigned int numBytesParsed = 0;
+    
+    // if tag found, store data in readGroup, return success
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+        const unsigned int dataLength = strlen(pTagData);
+        destination.clear();
+        destination.resize(dataLength);
+        memcpy( (char*)destination.data(), pTagData, dataLength );
+        return true;
+    }
+    
+    // tag not found, return failure
+    return false;
+}
+
+bool BamAlignment::GetTag(const string& tag, uint32_t& destination) const {
+  
+    // make sure tag data exists
+    if ( SupportData.HasCoreOnly || TagData.empty() ) 
+        return false;
+
+    // localize the tag data
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLength = TagData.size();
+    unsigned int numBytesParsed = 0;
+    
+    // if tag found, determine data byte-length, store data in readGroup, return success
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+        
+        // determine data byte-length
+        const char type = *(pTagData - 1);
+        int destinationLength = 0;
+        switch (type) {
+
+            // 1 byte data
+            case 'A':
+            case 'c':
+            case 'C':
+                destinationLength = 1;
+                break;
+
+            // 2 byte data
+            case 's':
+            case 'S':
+                destinationLength = 2;
+                break;
+
+            // 4 byte data
+            case 'i':
+            case 'I':
+                destinationLength = 4;
+                break;
+
+            // unsupported type for integer destination (float or var-length strings)
+            case 'f':
+            case 'Z':
+            case 'H':
+                fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
+                return false;
+
+            // unknown tag type
+            default:
+                fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
+                return false;
+        }
+          
+        // store in destination
+        destination = 0;
+        memcpy(&destination, pTagData, destinationLength);
+        return true;
+    }
+    
+    // tag not found, return failure
+    return false;
+}
+
+bool BamAlignment::GetTag(const string& tag, int32_t& destination) const {
+    return GetTag(tag, (uint32_t&)destination);
+}
+
+bool BamAlignment::GetTag(const string& tag, float& destination) const {
+  
+    // make sure tag data exists
+    if ( SupportData.HasCoreOnly || TagData.empty() ) 
+        return false;
+
+    // localize the tag data
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLength = TagData.size();
+    unsigned int numBytesParsed = 0;
+    
+    // if tag found, determine data byte-length, store data in readGroup, return success
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+        
+        // determine data byte-length
+        const char type = *(pTagData - 1);
+        int destinationLength = 0;
+        switch(type) {
+
+            // 1 byte data
+            case 'A':
+            case 'c':
+            case 'C':
+                destinationLength = 1;
+                break;
+
+            // 2 byte data
+            case 's':
+            case 'S':
+                destinationLength = 2;
+                break;
+
+            // 4 byte data
+            case 'f':
+            case 'i':
+            case 'I':
+                destinationLength = 4;
+                break;
+            
+            // unsupported type (var-length strings)
+            case 'Z':
+            case 'H':
+                fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
+                return false;
+
+            // unknown tag type
+            default:
+                fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
+                return false;
+        }
+          
+        // store in destination
+        destination = 0.0;
+        memcpy(&destination, pTagData, destinationLength);
+        return true;
+    }
+    
+    // tag not found, return failure
+    return false;
+}
+
+bool BamAlignment::GetTagType(const string& tag, char& type) const {
+  
+    // make sure tag data exists
+    if ( SupportData.HasCoreOnly || TagData.empty() ) 
+        return false;
+
+    // localize the tag data
+    char* pTagData = (char*)TagData.data();
+    const unsigned int tagDataLength = TagData.size();
+    unsigned int numBytesParsed = 0;
+    
+    // lookup tag
+    if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
+        
+        // retrieve tag type code
+        type = *(pTagData - 1);
+        
+        // validate that type is a proper BAM tag type
+        switch(type) {
+            case 'A':
+            case 'c':
+            case 'C':
+            case 's':
+            case 'S':
+            case 'f':
+            case 'i':
+            case 'I':
+            case 'Z':
+            case 'H':
+                return true;
+
+            // unknown tag type
+            default:
+                fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
+                return false;
+        }
+    }
+    
+    // tag not found, return failure
+    return false;
+}
+
+bool BamAlignment::RemoveTag(const string& tag) {
+  
+    // BamAlignments fetched using BamReader::GetNextAlignmentCore() are not allowed
+    // also, return false if no data present to remove
+    if ( SupportData.HasCoreOnly || TagData.empty() ) return false;
+  
+    // localize the tag data
+    char* pOriginalTagData = (char*)TagData.data();
+    char* pTagData = pOriginalTagData;
+    const unsigned int originalTagDataLength = TagData.size();
+    unsigned int newTagDataLength = 0;
+    unsigned int numBytesParsed = 0;
+    
+    // if tag found, store data in readGroup, return success
+    if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
+        
+        char newTagData[originalTagDataLength];
+
+        // copy original tag data up til desired tag
+        pTagData -= 3;
+        numBytesParsed -= 3;
+        const unsigned int beginningTagDataLength = numBytesParsed;
+        newTagDataLength += beginningTagDataLength;
+        memcpy(newTagData, pOriginalTagData, numBytesParsed);
+        
+        // skip to next tag (if tag for removal is last, return true) 
+        const char* pTagStorageType = pTagData + 2;
+        pTagData       += 3;
+        numBytesParsed += 3;
+        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
+         
+        // copy everything from current tag (the next one after tag for removal) to end
+        const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
+        const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
+        memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
+        
+        // save new tag data
+        TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
+        return true;
+    }
+    
+    // tag not found, no removal - return failure
+    return false;
+}
+
+bool BamAlignment::FindTag(const string& tag,
+                           char* &pTagData,
+                           const unsigned int& tagDataLength,
+                           unsigned int& numBytesParsed)
+{
+
+    while ( numBytesParsed < tagDataLength ) {
+
+        const char* pTagType        = pTagData;
+        const char* pTagStorageType = pTagData + 2;
+        pTagData       += 3;
+        numBytesParsed += 3;
+
+        // check the current tag, return true on match
+        if ( strncmp(pTagType, tag.c_str(), 2) == 0 ) 
+            return true;
+
+        // get the storage class and find the next tag
+        if ( *pTagStorageType == '\0' ) return false; 
+        if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
+        if ( *pTagData == '\0' ) return false;
+    }
+  
+    // checked all tags, none match
+    return false;
+}
+
+bool BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {
+    
+    switch(storageType) {
+
+        case 'A':
+        case 'c':
+        case 'C':
+            ++numBytesParsed;
+            ++pTagData;
+            break;
+
+        case 's':
+        case 'S':
+            numBytesParsed += 2;
+            pTagData       += 2;
+            break;
+
+        case 'f':
+        case 'i':
+        case 'I':
+            numBytesParsed += 4;
+            pTagData       += 4;
+            break;
+
+        case 'Z':
+        case 'H':
+            while(*pTagData) {
+                ++numBytesParsed;
+                ++pTagData;
+            }
+            // increment for null-terminator
+            ++numBytesParsed;
+            ++pTagData;
+            break;
+
+        default: 
+            // error case
+            fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", storageType);
+            return false;
+    }
+    
+    // return success
+    return true;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamAlignment.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,203 @@
+// ***************************************************************************
+// BamAlignment.h (c) 2009 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 13 December 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides the BamAlignment data structure
+// ***************************************************************************
+
+#ifndef BAMALIGNMENT_H
+#define BAMALIGNMENT_H
+
+#include <api_global.h>
+#include <BamAux.h>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+// forward declare BamAlignment's friend classes
+namespace Internal {
+    class BamReaderPrivate;
+    class BamWriterPrivate;
+} // namespace Internal
+
+// BamAlignment data structure
+// explicitly labeled as 'struct' to indicate that (most of) its fields are public
+struct API_EXPORT BamAlignment {
+
+    // constructors & destructor
+    public:
+        BamAlignment(void);
+        BamAlignment(const BamAlignment& other);
+        ~BamAlignment(void);
+
+    // Queries against alignment flags
+    public:        
+        bool IsDuplicate(void) const;           // Returns true if this read is a PCR duplicate       
+        bool IsFailedQC(void) const;            // Returns true if this read failed quality control      
+        bool IsFirstMate(void) const;           // Returns true if alignment is first mate on read        
+        bool IsMapped(void) const;              // Returns true if alignment is mapped        
+        bool IsMateMapped(void) const;          // Returns true if alignment's mate is mapped        
+        bool IsMateReverseStrand(void) const;   // Returns true if alignment's mate mapped to reverse strand        
+        bool IsPaired(void) const;              // Returns true if alignment part of paired-end read        
+        bool IsPrimaryAlignment(void) const;    // Returns true if reported position is primary alignment       
+        bool IsProperPair(void) const;          // Returns true if alignment is part of read that satisfied paired-end resolution     
+        bool IsReverseStrand(void) const;       // Returns true if alignment mapped to reverse strand
+        bool IsSecondMate(void) const;          // Returns true if alignment is second mate on read
+
+    // Manipulate alignment flags
+    public:        
+        void SetIsDuplicate(bool ok);           // Sets "PCR duplicate" flag        
+        void SetIsFailedQC(bool ok);            // Sets "failed quality control" flag        
+        void SetIsFirstMate(bool ok);           // Sets "alignment is first mate" flag
+        void SetIsMapped(bool ok);              // Sets "alignment is mapped" flag
+        void SetIsMateMapped(bool ok);          // Sets "alignment's mate is mapped" flag
+        void SetIsMateReverseStrand(bool ok);   // Sets "alignment's mate mapped to reverse strand" flag        
+        void SetIsPaired(bool ok);              // Sets "alignment part of paired-end read" flag
+        void SetIsPrimaryAlignment(bool ok);    // Sets "position is primary alignment" flag
+        void SetIsProperPair(bool ok);          // Sets "alignment is part of read that satisfied paired-end resolution" flag        
+        void SetIsReverseStrand(bool ok);       // Sets "alignment mapped to reverse strand" flag        
+        void SetIsSecondMate(bool ok);          // Sets "alignment is second mate on read" flag
+
+        // legacy methods (deprecated, but available)
+        void SetIsMateUnmapped(bool ok);        // Complement of IsMateMapped() flag
+        void SetIsSecondaryAlignment(bool ok);  // Complement of IsPrimaryAlignment() flag
+        void SetIsUnmapped(bool ok);            // Complement of IsMapped() flag
+
+    // Tag data access methods
+    public:
+        // -------------------------------------------------------------------------------------
+        // N.B. - The following tag access methods may not be used on BamAlignments fetched
+        // using BamReader::GetNextAlignmentCore().  Attempting to use them will not result in 
+        // error message (to keep output clean) but will ALWAYS return false.  Only user-created
+        // BamAlignments or those retrieved using BamReader::GetNextAlignment() are valid here.
+
+        // add tag data (create new TAG entry with TYPE and VALUE)
+        // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details
+        // returns true if new data added, false if error or TAG already exists
+        // N.B. - will NOT modify existing tag. Use EditTag() instead
+        // @tag   - two character tag name
+        // @type  - single character tag type (see SAM/BAM spec for details)
+        // @value - value to associate with tag
+        bool AddTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H
+        bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value);    // type must be A or i
+        bool AddTag(const std::string& tag, const std::string& type, const int32_t& value);     // type must be A or i
+        bool AddTag(const std::string& tag, const std::string& type, const float& value);       // type must be A, i, or f
+        
+        // edit tag data (sets existing TAG with TYPE to VALUE or adds new TAG if not already present)
+        // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details
+        // returns true if edit was successfaul, false if error
+        // @tag   - two character tag name
+        // @type  - single character tag type (see SAM/BAM spec for details)
+        // @value - new value for tag
+        bool EditTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H
+        bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value);    // type must be A or i
+        bool EditTag(const std::string& tag, const std::string& type, const int32_t& value);     // type must be A or i
+        bool EditTag(const std::string& tag, const std::string& type, const float& value);       // type must be A, i, or f
+
+        // specific tag data access methods - these only remain for legacy support
+        // returns whether specific tag could be retrieved
+        bool GetEditDistance(uint32_t& editDistance) const; // get "NM" tag data (equivalent to GetTag("NM", editDistance))
+        bool GetReadGroup(std::string& readGroup) const;    // get "RG" tag data (equivalent to GetTag("RG", readGroup)) 
+        
+        // generic tag data access methods 
+        // returns whether tag is found & tag type is compatible with DESTINATION
+        // @tag - two character tag name
+        // @destination - if found, tag value is stored here
+        bool GetTag(const std::string& tag, std::string& destination) const;    // access variable-length char or hex strings 
+        bool GetTag(const std::string& tag, uint32_t& destination) const;       // access unsigned integer data
+        bool GetTag(const std::string& tag, int32_t& destination) const;        // access signed integer data
+        bool GetTag(const std::string& tag, float& destination) const;          // access floating point data
+        
+        // retrieve the tag type code for TAG
+        // returns true if tag could be found and type determined
+        bool GetTagType(const std::string& tag, char& type) const;
+        
+        // remove tag data
+        // returns true if removal was successful, false if error
+        // N.B. - returns false if TAG does not exist (no removal can occur)
+        // @tag - two character tag name
+        bool RemoveTag(const std::string& tag);
+
+    // Additional data access methods
+    public:
+        // calculates & returns alignment end position, based on starting position and CIGAR operations
+        // @usePadded - if true, counts inserted bases. Default is false, so that alignment end position matches the last base's position in reference
+        // @zeroBased - if true, returns 0-based coordinate; else returns 1-based. Setting this to false is useful when using BAM data along with other, half-open formats.
+        int GetEndPosition(bool usePadded = false, bool zeroBased = true) const;  
+
+    // 'internal' utility methods 
+    private:
+        static bool FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed);
+        static bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed);
+
+    // Data members
+    public:
+        std::string Name;              // Read name
+        int32_t     Length;            // Query length
+        std::string QueryBases;        // 'Original' sequence (as reported from sequencing machine)
+        std::string AlignedBases;      // 'Aligned' sequence (includes any indels, padding, clipping)
+        std::string Qualities;         // FASTQ qualities (ASCII characters, not numeric values)
+        std::string TagData;           // Tag data (accessor methods will pull the requested information out)
+        int32_t     RefID;             // ID number for reference sequence
+        int32_t     Position;          // Position (0-based) where alignment starts
+        uint16_t    Bin;               // Bin in BAM file where this alignment resides
+        uint16_t    MapQuality;        // Mapping quality score
+        uint32_t    AlignmentFlag;     // Alignment bit-flag - see Is<something>() methods to query this value, SetIs<something>() methods to manipulate 
+        std::vector<CigarOp> CigarData; // CIGAR operations for this alignment
+        int32_t     MateRefID;         // ID number for reference sequence where alignment's mate was aligned
+        int32_t     MatePosition;      // Position (0-based) where alignment's mate starts
+        int32_t     InsertSize;        // Mate-pair insert size
+          
+    // Internal data, inaccessible to client code
+    // but available BamReaderPrivate & BamWriterPrivate
+    private:
+        struct BamAlignmentSupportData {
+      
+            // data members
+            std::string AllCharData;
+            uint32_t    BlockLength;
+            uint32_t    NumCigarOperations;
+            uint32_t    QueryNameLength;
+            uint32_t    QuerySequenceLength;
+            bool        HasCoreOnly;
+            
+            // constructor
+            BamAlignmentSupportData(void)
+                : BlockLength(0)
+                , NumCigarOperations(0)
+                , QueryNameLength(0)
+                , QuerySequenceLength(0)
+                , HasCoreOnly(false)
+            { }
+        };
+	BamAlignmentSupportData SupportData;
+	friend class Internal::BamReaderPrivate;
+	friend class Internal::BamWriterPrivate;
+        
+    // Alignment flag query constants
+    // Use the get/set methods above instead
+    private:
+        enum { PAIRED        = 1
+             , PROPER_PAIR   = 2
+             , UNMAPPED      = 4
+             , MATE_UNMAPPED = 8
+             , REVERSE       = 16
+             , MATE_REVERSE  = 32
+             , READ_1        = 64
+             , READ_2        = 128
+             , SECONDARY     = 256
+             , QC_FAILED     = 512
+             , DUPLICATE     = 1024 
+             };
+};
+
+// convenience typedef(s)
+typedef std::vector<BamAlignment> BamAlignmentVector;
+
+} // namespace BamTools
+
+#endif // BAMALIGNMENT_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamAux.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,227 @@
+// ***************************************************************************
+// BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic constants, data structures, utilities etc. 
+// used throughout the API for handling BAM files
+// ***************************************************************************
+
+#ifndef BAMAUX_H
+#define BAMAUX_H
+
+#include <api_global.h>
+
+#include <fstream> 
+#include <iostream>
+#include <string>
+#include <vector>
+
+// Platform-specific large-file support
+#ifndef BAMTOOLS_LFS
+#define BAMTOOLS_LFS
+    #ifdef WIN32
+        #define ftell64(a)     _ftelli64(a)
+        #define fseek64(a,b,c) _fseeki64(a,b,c)
+    #else
+        #define ftell64(a)     ftello(a)
+        #define fseek64(a,b,c) fseeko(a,b,c)
+    #endif
+#endif // BAMTOOLS_LFS
+
+// Platform-specific type definitions
+#ifndef BAMTOOLS_TYPES
+#define BAMTOOLS_TYPES
+    #ifdef _MSC_VER
+        typedef char                 int8_t;
+        typedef unsigned char       uint8_t;
+        typedef short               int16_t;
+        typedef unsigned short     uint16_t;
+        typedef int                 int32_t;
+        typedef unsigned int       uint32_t;
+        typedef long long           int64_t;
+        typedef unsigned long long uint64_t;
+    #else
+        #include <stdint.h>
+    #endif
+#endif // BAMTOOLS_TYPES
+
+namespace BamTools {
+
+// ----------------------------------------------------------------
+// ----------------------------------------------------------------
+// BAM constants
+
+const int BAM_CMATCH      = 0;
+const int BAM_CINS        = 1;
+const int BAM_CDEL        = 2;
+const int BAM_CREF_SKIP   = 3;
+const int BAM_CSOFT_CLIP  = 4;
+const int BAM_CHARD_CLIP  = 5;
+const int BAM_CPAD        = 6;
+const int BAM_CIGAR_SHIFT = 4;
+const int BAM_CIGAR_MASK  = ((1 << BAM_CIGAR_SHIFT) - 1);
+const int BAM_CORE_SIZE   = 32;
+const int BT_SIZEOF_INT   = 4;
+
+// ----------------------------------------------------------------
+// ----------------------------------------------------------------
+// Data structs & typedefs
+
+// CIGAR operation data structure
+struct API_EXPORT CigarOp {
+  
+    // data members
+    char     Type;   // Operation type (MIDNSHP)
+    uint32_t Length; // Operation length (number of bases)
+    
+    // constructor
+    CigarOp(const char type = '\0', 
+            const uint32_t length = 0) 
+        : Type(type)
+        , Length(length) 
+    { }
+};
+
+// Reference data entry
+struct API_EXPORT RefData {
+   
+    // data members
+    std::string RefName;          // Name of reference sequence
+    int32_t     RefLength;        // Length of reference sequence
+    bool        RefHasAlignments; // True if BAM file contains alignments mapped to reference sequence
+    
+    // constructor
+    RefData(const int32_t& length = 0, 
+            bool ok = false)
+        : RefLength(length)
+        , RefHasAlignments(ok)
+    { }
+};
+typedef std::vector<RefData> RefVector;
+
+// General (sequential) genome region
+struct API_EXPORT BamRegion {
+  
+    // data members
+    int LeftRefID;
+    int LeftPosition;
+    int RightRefID;
+    int RightPosition;
+    
+    // constructor
+    BamRegion(const int& leftID   = -1, 
+              const int& leftPos  = -1,
+              const int& rightID  = -1,
+              const int& rightPos = -1)
+        : LeftRefID(leftID)
+        , LeftPosition(leftPos)
+        , RightRefID(rightID)
+        , RightPosition(rightPos)
+    { }
+    
+    // copy constructor
+    BamRegion(const BamRegion& other)
+	: LeftRefID(other.LeftRefID)
+	, LeftPosition(other.LeftPosition)
+	, RightRefID(other.RightRefID)
+	, RightPosition(other.RightPosition)
+    { }
+    
+    // member functions
+    void clear(void) { LeftRefID = -1; LeftPosition = -1; RightRefID = -1; RightPosition = -1; }
+    bool isLeftBoundSpecified(void) const { return ( LeftRefID >= 0 && LeftPosition >= 0 ); }
+    bool isNull(void) const { return ( !isLeftBoundSpecified() && !isRightBoundSpecified() ); }
+    bool isRightBoundSpecified(void) const { return ( RightRefID >= 0 && RightPosition >= 0 ); }
+};
+
+// ----------------------------------------------------------------
+// ----------------------------------------------------------------
+// General utilities 
+
+// returns true if system is big endian
+inline bool SystemIsBigEndian(void) {
+   const uint16_t one = 0x0001;
+   return ((*(char*) &one) == 0 );
+}
+
+// swaps endianness of 16-bit value 'in place'
+inline void SwapEndian_16(int16_t& x) {
+    x = ((x >> 8) | (x << 8));
+}
+
+inline void SwapEndian_16(uint16_t& x) {
+    x = ((x >> 8) | (x << 8));
+}
+
+// swaps endianness of 32-bit value 'in-place'
+inline void SwapEndian_32(int32_t& x) {
+    x = ( (x >> 24) | 
+         ((x << 8) & 0x00FF0000) | 
+         ((x >> 8) & 0x0000FF00) | 
+          (x << 24)
+        );
+}
+
+inline void SwapEndian_32(uint32_t& x) {
+    x = ( (x >> 24) | 
+         ((x << 8) & 0x00FF0000) | 
+         ((x >> 8) & 0x0000FF00) | 
+          (x << 24)
+        );
+}
+
+// swaps endianness of 64-bit value 'in-place'
+inline void SwapEndian_64(int64_t& x) {
+    x = ( (x >> 56) | 
+         ((x << 40) & 0x00FF000000000000ll) |
+         ((x << 24) & 0x0000FF0000000000ll) |
+         ((x << 8)  & 0x000000FF00000000ll) |
+         ((x >> 8)  & 0x00000000FF000000ll) |
+         ((x >> 24) & 0x0000000000FF0000ll) |
+         ((x >> 40) & 0x000000000000FF00ll) |
+          (x << 56)
+        );
+}
+
+inline void SwapEndian_64(uint64_t& x) {
+    x = ( (x >> 56) | 
+         ((x << 40) & 0x00FF000000000000ll) |
+         ((x << 24) & 0x0000FF0000000000ll) |
+         ((x << 8)  & 0x000000FF00000000ll) |
+         ((x >> 8)  & 0x00000000FF000000ll) |
+         ((x >> 24) & 0x0000000000FF0000ll) |
+         ((x >> 40) & 0x000000000000FF00ll) |
+          (x << 56)
+        );
+}
+
+// swaps endianness of 'next 2 bytes' in a char buffer (in-place)
+inline void SwapEndian_16p(char* data) {
+    uint16_t& value = (uint16_t&)*data; 
+    SwapEndian_16(value);
+}
+
+// swaps endianness of 'next 4 bytes' in a char buffer (in-place)
+inline void SwapEndian_32p(char* data) {
+    uint32_t& value = (uint32_t&)*data; 
+    SwapEndian_32(value);
+}
+
+// swaps endianness of 'next 8 bytes' in a char buffer (in-place)
+inline void SwapEndian_64p(char* data) {
+    uint64_t& value = (uint64_t&)*data; 
+    SwapEndian_64(value);
+}
+
+// returns whether file exists (can be opened OK)
+inline bool FileExists(const std::string& filename) {
+    std::ifstream f(filename.c_str(), std::ifstream::in);
+    return !f.fail();
+}
+
+} // namespace BamTools
+
+#endif // BAMAUX_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamIndex.cpp	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,230 @@
+// ***************************************************************************
+// BamIndex.cpp (c) 2009 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 22 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides index functionality - both for the default (standardized) BAM 
+// index format (.bai) as well as a BamTools-specific (nonstandard) index 
+// format (.bti).
+// ***************************************************************************
+
+#include <BamIndex.h>
+#include <BamReader.h>
+#include <BGZF.h>
+#include <BamStandardIndex_p.h>
+#include <BamToolsIndex_p.h>
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <cstdlib>
+#include <algorithm>
+#include <iostream>
+#include <map>
+using namespace std;
+
+// --------------------------------------------------
+// BamIndex factory methods
+
+// returns index based on BAM filename 'stub'
+// checks first for preferred type, returns that type if found
+// (if not found, attmempts to load other type(s), returns 0 if NONE found)
+//
+// ** default preferred type is BamToolsIndex ** use this anytime it exists
+BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename,
+				    BamTools::BgzfData* bgzf,
+				    BamTools::BamReader* reader,
+				    const BamIndex::PreferredIndexType& type)
+{
+    // ---------------------------------------------------
+    // attempt to load preferred type first
+
+    const std::string bamtoolsIndexFilename = bamFilename + ".bti";
+    const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename);
+    if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists )
+        return new BamToolsIndex(bgzf, reader);
+
+    const std::string standardIndexFilename = bamFilename + ".bai";
+    const bool standardIndexExists = BamTools::FileExists(standardIndexFilename);
+    if ( (type == BamIndex::STANDARD) && standardIndexExists )
+        return new BamStandardIndex(bgzf, reader);
+
+    // ----------------------------------------------------
+    // preferred type could not be found, try other (non-preferred) types
+    // if none found, return 0
+
+    if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader);
+    if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader);
+        return 0;
+}
+
+// returns index based on explicitly named index file (or 0 if not found)
+BamIndex* BamIndex::FromIndexFilename(const std::string&   indexFilename,
+				      BamTools::BgzfData*  bgzf,
+				      BamTools::BamReader* reader)
+{
+    // see if specified file exists
+    const bool indexExists = BamTools::FileExists(indexFilename);
+    if ( !indexExists ) return 0;
+
+    const std::string bamtoolsIndexExtension(".bti");
+    const std::string standardIndexExtension(".bai");
+
+    // if has bamtoolsIndexExtension
+    if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) )
+        return new BamToolsIndex(bgzf, reader);
+
+     // if has standardIndexExtension
+    if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) )
+        return new BamStandardIndex(bgzf, reader);
+
+    // otherwise, unsupported file type
+    return 0;
+}
+
+// -------------------------------
+// BamIndex implementation
+
+// ctor
+BamIndex::BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader)
+    : m_BGZF(bgzf)
+    , m_reader(reader)
+    , m_cacheMode(BamIndex::LimitedIndexCaching)
+    , m_indexStream(0)
+{ 
+    if ( m_reader && m_reader->IsOpen() ) 
+        m_references = m_reader->GetReferenceData();
+}
+
+// dtor
+BamIndex::~BamIndex(void) {
+    if ( IsOpen() )
+        fclose(m_indexStream);
+}
+
+// return true if FILE* is open
+bool BamIndex::IsOpen(void) const {
+    return ( m_indexStream != 0 );
+}
+
+// loads existing data from file into memory
+bool BamIndex::Load(const string& filename)  {
+
+    // open index file, abort on error
+    if ( !OpenIndexFile(filename, "rb") ) {
+        fprintf(stderr, "ERROR: Unable to open the BAM index file %s for reading.\n", filename.c_str());
+        return false;
+    }
+
+    // check magic number
+    if ( !LoadHeader() ) {
+        fclose(m_indexStream);
+        return false;
+    }
+
+    // load reference data (but only keep in memory if full caching requested)
+    bool saveInitialLoad = ( m_cacheMode == BamIndex::FullIndexCaching );
+    if ( !LoadAllReferences(saveInitialLoad) ) {
+        fclose(m_indexStream);
+        return false;
+    }
+
+    // update index cache based on selected mode
+    UpdateCache();
+
+    // return success
+    return true;
+}
+
+// opens index file for reading/writing, return true if opened OK
+bool BamIndex::OpenIndexFile(const string& filename, const string& mode) {
+    m_indexStream = fopen(filename.c_str(), mode.c_str());
+    return ( m_indexStream != 0 );
+}
+
+// rewind index file to beginning of index data, return true if rewound OK
+bool BamIndex::Rewind(void) {
+    return ( fseek64(m_indexStream, DataBeginOffset(), SEEK_SET) == 0 );
+}
+
+// change the index caching behavior
+void BamIndex::SetCacheMode(const BamIndexCacheMode mode) {
+    if ( mode != m_cacheMode ) {
+        m_cacheMode = mode;
+        UpdateCache();
+    }
+}
+
+// updates in-memory cache of index data, depending on current cache mode
+void BamIndex::UpdateCache(void) {
+
+    // skip if file not open
+    if ( !IsOpen() ) return;
+
+    // reflect requested cache mode behavior
+    switch ( m_cacheMode ) {
+
+        case (BamIndex::FullIndexCaching) :
+            Rewind();
+            LoadAllReferences(true);
+            break;
+
+        case (BamIndex::LimitedIndexCaching) :
+            if ( HasFullDataCache() )
+                KeepOnlyFirstReferenceOffsets();
+            else {
+                ClearAllData();
+                SkipToFirstReference();
+                LoadFirstReference(true);
+            }
+            break;
+        case(BamIndex::NoIndexCaching) :
+            ClearAllData();
+            break;
+        default :
+            // unreachable
+            ;
+    }
+}
+
+// writes in-memory index data out to file
+bool BamIndex::Write(const string& bamFilename) {
+
+    // open index file for writing
+    string indexFilename = bamFilename + Extension();
+    if ( !OpenIndexFile(indexFilename, "wb") ) {
+        fprintf(stderr, "ERROR: Could not open file to save index.\n");
+        return false;
+    }
+
+    // write index header data
+    if ( !WriteHeader() ) {
+        fprintf(stderr, "ERROR: There was a problem writing index metadata to new index file.\n");
+        fflush(m_indexStream);
+        fclose(m_indexStream);
+        exit(1);
+    }
+
+    // write main index data
+    if ( !WriteAllReferences() ) {
+        fprintf(stderr, "ERROR: There was a problem writing index data to new index file.\n");
+        fflush(m_indexStream);
+        fclose(m_indexStream);
+        exit(1);
+    }
+
+    // flush any remaining output, rewind file, and return success
+    fflush(m_indexStream);
+    fclose(m_indexStream);
+
+    // re-open index file for later reading
+    if ( !OpenIndexFile(indexFilename, "rb") ) {
+        fprintf(stderr, "ERROR: Could not open newly created index file for reading.\n");
+        return false;
+    }
+
+    // return success/failure of write
+    return true;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamIndex.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,145 @@
+// ***************************************************************************
+// BamIndex.h (c) 2009 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides basic BAM index interface
+// ***************************************************************************
+
+#ifndef BAM_INDEX_H
+#define BAM_INDEX_H
+
+#include <api_global.h>
+#include <BamAux.h>
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+class BamReader;
+class BgzfData;
+
+namespace Internal {
+  class BamStandardIndex;
+  class BamToolsIndex;
+} // namespace Internal
+
+// --------------------------------------------------  
+// BamIndex base class
+class API_EXPORT BamIndex {
+
+    // specify index-caching behavior
+    //
+    // @FullIndexCaching - store entire index file contents in memory
+    // @LimitedIndexCaching - store only index data for current reference
+    //   being processed
+    // @NoIndexCaching - do not store any index data.  Load as needed to 
+    //   calculate jump offset
+    public: enum BamIndexCacheMode { FullIndexCaching = 0
+                                   , LimitedIndexCaching
+                                   , NoIndexCaching
+                                   };
+  
+    // ctor & dtor
+    public:
+        BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader);
+        virtual ~BamIndex(void);
+        
+    // index interface
+    public:
+        // creates index data (in-memory) from current reader data
+        virtual bool Build(void) =0;
+        // returns supported file extension
+        virtual const std::string Extension(void) const =0;
+        // returns whether reference has alignments or no
+        virtual bool HasAlignments(const int& referenceID) const =0;
+        // attempts to use index to jump to region; returns success/fail
+        // a "successful" jump indicates no error, but not whether this region has data
+        //   * thus, the method sets a flag to indicate whether there are alignments 
+        //     available after the jump position
+        virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0;
+        // loads existing data from file into memory
+        virtual bool Load(const std::string& filename);
+        // change the index caching behavior
+        virtual void SetCacheMode(const BamIndexCacheMode mode);
+        // writes in-memory index data out to file 
+        // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
+        virtual bool Write(const std::string& bamFilename);
+        
+    // derived-classes MUST provide implementation
+    protected:
+        // clear all current index offset data in memory
+        virtual void ClearAllData(void) =0;
+        // return file position after header metadata
+        virtual const off_t DataBeginOffset(void) const =0;
+        // return true if all index data is cached
+        virtual bool HasFullDataCache(void) const =0;
+        // clears index data from all references except the first
+        virtual void KeepOnlyFirstReferenceOffsets(void) =0;
+        // load index data for all references, return true if loaded OK
+        // @saveData - save data in memory if true, just read & discard if false
+        virtual bool LoadAllReferences(bool saveData = true) =0;
+        // load first reference from file, return true if loaded OK
+        // @saveData - save data in memory if true, just read & discard if false
+        virtual bool LoadFirstReference(bool saveData = true) =0;
+        // load header data from index file, return true if loaded OK
+        virtual bool LoadHeader(void) =0;
+        // position file pointer to first reference begin, return true if skipped OK
+        virtual bool SkipToFirstReference(void) =0;
+        // write index reference data
+        virtual bool WriteAllReferences(void) =0;
+        // write index header data
+        virtual bool WriteHeader(void) =0;
+
+    // internal methods
+    protected:
+        // rewind index file to beginning of index data, return true if rewound OK
+        bool Rewind(void);
+
+    private:
+        // return true if FILE* is open
+        bool IsOpen(void) const;
+        // opens index file according to requested mode, return true if opened OK
+        bool OpenIndexFile(const std::string& filename, const std::string& mode);
+        // updates in-memory cache of index data, depending on current cache mode
+        void UpdateCache(void);
+
+    // factory methods for returning proper BamIndex-derived type based on available index files
+    public:
+      
+        // returns index based on BAM filename 'stub'
+        // checks first for preferred type, returns that type if found
+        // (if not found, attmempts to load other type(s), returns 0 if NONE found)
+        //
+        // ** default preferred type is BamToolsIndex ** use this anytime it exists
+        enum PreferredIndexType { BAMTOOLS = 0, STANDARD };
+        static BamIndex* FromBamFilename(const std::string&   bamFilename,
+                                         BamTools::BgzfData*  bgzf,
+                                         BamTools::BamReader* reader, 
+                                         const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS);
+        
+        // returns index based on explicitly named index file (or 0 if not found)
+        static BamIndex* FromIndexFilename(const std::string&   indexFilename,
+                                           BamTools::BgzfData*  bgzf,
+                                           BamTools::BamReader* reader);
+
+    // data members
+    protected:
+        BamTools::BgzfData*  m_BGZF;
+        BamTools::BamReader* m_reader;
+        BamTools::RefVector  m_references;
+        BamIndex::BamIndexCacheMode m_cacheMode;
+        FILE* m_indexStream;
+
+
+    // friends
+    friend class Internal::BamStandardIndex;
+    friend class Internal::BamToolsIndex;
+};
+
+} // namespace BamTools
+
+#endif // BAM_INDEX_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamMultiReader.cpp	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,450 @@
+// ***************************************************************************
+// BamMultiReader.cpp (c) 2010 Erik Garrison, Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Uses BGZF routines were adapted from the bgzf.c code developed at the Broad
+// Institute.
+// ---------------------------------------------------------------------------
+// Functionality for simultaneously reading multiple BAM files.
+//
+// This functionality allows applications to work on very large sets of files
+// without requiring intermediate merge, sort, and index steps for each file
+// subset.  It also improves the performance of our merge system as it
+// precludes the need to sort merged files.
+// ***************************************************************************
+
+#include <BamMultiReader.h>
+#include <BGZF.h>
+using namespace BamTools;
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <sstream>
+#include <string>
+#include <vector>
+using namespace std;
+
+// -----------------------------------------------------
+// BamMultiReader implementation
+// -----------------------------------------------------
+
+// constructor
+BamMultiReader::BamMultiReader(void)
+    : CurrentRefID(0)
+    , CurrentLeft(0)
+{ }
+
+// destructor
+BamMultiReader::~BamMultiReader(void) {
+    Close(); 
+}
+
+// close the BAM files
+void BamMultiReader::Close(void) {
+  
+    // close all BAM readers and clean up pointers
+    vector<pair<BamReader*, BamAlignment*> >::iterator readerIter = readers.begin();
+    vector<pair<BamReader*, BamAlignment*> >::iterator readerEnd  = readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter) {
+      
+        BamReader* reader = (*readerIter).first;
+        BamAlignment* alignment = (*readerIter).second;
+        
+        // close the reader
+        if ( reader) reader->Close();  
+        
+        // delete reader pointer
+        delete reader;
+        reader = 0;
+
+        // delete alignment pointer
+        delete alignment;
+        alignment = 0;
+    }
+
+    // clear out the container
+    readers.clear();
+}
+
+// saves index data to BAM index files (".bai"/".bti") where necessary, returns success/fail
+bool BamMultiReader::CreateIndexes(bool useStandardIndex) {
+    bool result = true;
+    for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
+        BamReader* reader = it->first;
+        result &= reader->CreateIndex(useStandardIndex);
+    }
+    return result;
+}
+
+// sets the index caching mode on the readers
+void BamMultiReader::SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode) {
+    for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
+        BamReader* reader = it->first;
+        reader->SetIndexCacheMode(mode);
+    }
+}
+
+// for debugging
+void BamMultiReader::DumpAlignmentIndex(void) {
+    for (AlignmentIndex::const_iterator it = alignments.begin(); it != alignments.end(); ++it) {
+        cerr << it->first.first << ":" << it->first.second << " " << it->second.first->GetFilename() << endl;
+    }
+}
+
+// makes a virtual, unified header for all the bam files in the multireader
+const string BamMultiReader::GetHeaderText(void) const {
+
+    string mergedHeader = "";
+    map<string, bool> readGroups;
+
+    // foreach extraction entry (each BAM file)
+    for (vector<pair<BamReader*, BamAlignment*> >::const_iterator rs = readers.begin(); rs != readers.end(); ++rs) {
+
+        BamReader* reader = rs->first;
+        string headerText = reader->GetHeaderText();
+        if ( headerText.empty() ) continue;
+        
+        map<string, bool> currentFileReadGroups;
+        stringstream header(headerText);
+        vector<string> lines;
+        string item;
+        while (getline(header, item))
+            lines.push_back(item);
+
+        for (vector<string>::const_iterator it = lines.begin(); it != lines.end(); ++it) {
+
+            // get next line from header, skip if empty
+            string headerLine = *it;
+            if ( headerLine.empty() ) { continue; }
+
+            // if first file, save HD & SQ entries
+            if ( rs == readers.begin() ) {
+                if ( headerLine.find("@HD") == 0 || headerLine.find("@SQ") == 0) {
+                    mergedHeader.append(headerLine.c_str());
+                    mergedHeader.append(1, '\n');
+                }
+            }
+
+            // (for all files) append RG entries if they are unique
+            if ( headerLine.find("@RG") == 0 ) {
+                stringstream headerLineSs(headerLine);
+                string part, readGroupPart, readGroup;
+                while(std::getline(headerLineSs, part, '\t')) {
+                    stringstream partSs(part);
+                    string subtag;
+                    std::getline(partSs, subtag, ':');
+                    if (subtag == "ID") {
+                        std::getline(partSs, readGroup, ':');
+                        break;
+                    }
+                }
+                if (readGroups.find(readGroup) == readGroups.end()) { // prevents duplicate @RG entries
+                    mergedHeader.append(headerLine.c_str() );
+                    mergedHeader.append(1, '\n');
+                    readGroups[readGroup] = true;
+                    currentFileReadGroups[readGroup] = true;
+                } else {
+                    // warn iff we are reading one file and discover duplicated @RG tags in the header
+                    // otherwise, we emit no warning, as we might be merging multiple BAM files with identical @RG tags
+                    if (currentFileReadGroups.find(readGroup) != currentFileReadGroups.end()) {
+                        cerr << "WARNING: duplicate @RG tag " << readGroup 
+                            << " entry in header of " << reader->GetFilename() << endl;
+                    }
+                }
+            }
+        }
+    }
+
+    // return merged header text
+    return mergedHeader;
+}
+
+// get next alignment among all files
+bool BamMultiReader::GetNextAlignment(BamAlignment& nextAlignment) {
+
+    // bail out if we are at EOF in all files, means no more alignments to process
+    if (!HasOpenReaders())
+        return false;
+
+    // when all alignments have stepped into a new target sequence, update our
+    // current reference sequence id
+    UpdateReferenceID();
+
+    // our lowest alignment and reader will be at the front of our alignment index
+    BamAlignment* alignment = alignments.begin()->second.second;
+    BamReader* reader = alignments.begin()->second.first;
+
+    // now that we have the lowest alignment in the set, save it by copy to our argument
+    nextAlignment = BamAlignment(*alignment);
+
+    // remove this alignment index entry from our alignment index
+    alignments.erase(alignments.begin());
+
+    // and add another entry if we can get another alignment from the reader
+    if (reader->GetNextAlignment(*alignment)) {
+        alignments.insert(make_pair(make_pair(alignment->RefID, alignment->Position),
+                                    make_pair(reader, alignment)));
+    } else { // do nothing
+        //cerr << "reached end of file " << lowestReader->GetFilename() << endl;
+    }
+
+    return true;
+
+}
+
+// get next alignment among all files without parsing character data from alignments
+bool BamMultiReader::GetNextAlignmentCore(BamAlignment& nextAlignment) {
+
+    // bail out if we are at EOF in all files, means no more alignments to process
+    if (!HasOpenReaders())
+        return false;
+
+    // when all alignments have stepped into a new target sequence, update our
+    // current reference sequence id
+    UpdateReferenceID();
+
+    // our lowest alignment and reader will be at the front of our alignment index
+    BamAlignment* alignment = alignments.begin()->second.second;
+    BamReader* reader = alignments.begin()->second.first;
+
+    // now that we have the lowest alignment in the set, save it by copy to our argument
+    nextAlignment = BamAlignment(*alignment);
+    //memcpy(&nextAlignment, alignment, sizeof(BamAlignment));
+
+    // remove this alignment index entry from our alignment index
+    alignments.erase(alignments.begin());
+
+    // and add another entry if we can get another alignment from the reader
+    if (reader->GetNextAlignmentCore(*alignment)) {
+        alignments.insert(make_pair(make_pair(alignment->RefID, alignment->Position), 
+                                    make_pair(reader, alignment)));
+    } else { // do nothing
+        //cerr << "reached end of file " << lowestReader->GetFilename() << endl;
+    }
+
+    return true;
+
+}
+
+// ---------------------------------------------------------------------------------------
+//
+// NB: The following GetReferenceX() functions assume that we have identical 
+// references for all BAM files.  We enforce this by invoking the above 
+// validation function (ValidateReaders) to verify that our reference data 
+// is the same across all files on Open, so we will not encounter a situation 
+// in which there is a mismatch and we are still live.
+//
+// ---------------------------------------------------------------------------------------
+
+// returns the number of reference sequences
+const int BamMultiReader::GetReferenceCount(void) const {
+    return readers.front().first->GetReferenceCount();
+}
+
+// returns vector of reference objects
+const BamTools::RefVector BamMultiReader::GetReferenceData(void) const {
+    return readers.front().first->GetReferenceData();
+}
+
+// returns refID from reference name
+const int BamMultiReader::GetReferenceID(const string& refName) const { 
+    return readers.front().first->GetReferenceID(refName);
+}
+
+// ---------------------------------------------------------------------------------------
+
+// checks if any readers still have alignments
+bool BamMultiReader::HasOpenReaders() {
+    return alignments.size() > 0;
+}
+
+// returns whether underlying BAM readers ALL have an index loaded
+// this is useful to indicate whether Jump() or SetRegion() are possible
+bool BamMultiReader::IsIndexLoaded(void) const {
+    bool ok = true;
+    vector<pair<BamReader*, BamAlignment*> >::const_iterator readerIter = readers.begin();
+    vector<pair<BamReader*, BamAlignment*> >::const_iterator readerEnd  = readers.end();
+    for ( ; readerIter != readerEnd; ++readerIter ) {
+        const BamReader* reader = (*readerIter).first;
+        if ( reader ) ok &= reader->IsIndexLoaded();
+    }
+    return ok;
+}
+
+// jumps to specified region(refID, leftBound) in BAM files, returns success/fail
+bool BamMultiReader::Jump(int refID, int position) {
+
+    //if ( References.at(refID).RefHasAlignments && (position <= References.at(refID).RefLength) ) {
+    CurrentRefID = refID;
+    CurrentLeft  = position;
+
+    bool result = true;
+    for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
+        BamReader* reader = it->first;
+        result &= reader->Jump(refID, position);
+        if (!result) {
+            cerr << "ERROR: could not jump " << reader->GetFilename() << " to " << refID << ":" << position << endl;
+            exit(1);
+        }
+    }
+    if (result) UpdateAlignments();
+    return result;
+}
+
+// opens BAM files
+bool BamMultiReader::Open(const vector<string>& filenames, bool openIndexes, bool coreMode, bool preferStandardIndex) {
+    
+    // for filename in filenames
+    fileNames = filenames; // save filenames in our multireader
+    for (vector<string>::const_iterator it = filenames.begin(); it != filenames.end(); ++it) {
+
+        const string filename = *it;
+        BamReader* reader = new BamReader;
+
+        bool openedOK = true;
+        openedOK = reader->Open(filename, "", openIndexes, preferStandardIndex);
+        
+        // if file opened ok, check that it can be read
+        if ( openedOK ) {
+           
+            bool fileOK = true;
+            BamAlignment* alignment = new BamAlignment;
+            fileOK &= ( coreMode ? reader->GetNextAlignmentCore(*alignment) : reader->GetNextAlignment(*alignment) );
+            
+            if (fileOK) {
+                readers.push_back(make_pair(reader, alignment)); // store pointers to our readers for cleanup
+                alignments.insert(make_pair(make_pair(alignment->RefID, alignment->Position),
+                                            make_pair(reader, alignment)));
+            } else {
+                cerr << "WARNING: could not read first alignment in " << filename << ", ignoring file" << endl;
+                // if only file available & could not be read, return failure
+                if ( filenames.size() == 1 ) return false;
+            }
+        } 
+       
+        // TODO; any further error handling when openedOK is false ??
+        else 
+            return false;
+    }
+
+    // files opened ok, at least one alignment could be read,
+    // now need to check that all files use same reference data
+    ValidateReaders();
+    return true;
+}
+
+void BamMultiReader::PrintFilenames(void) {
+    for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
+        BamReader* reader = it->first;
+        cout << reader->GetFilename() << endl;
+    }
+}
+
+// returns BAM file pointers to beginning of alignment data
+bool BamMultiReader::Rewind(void) { 
+    bool result = true;
+    for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
+        BamReader* reader = it->first;
+        result &= reader->Rewind();
+    }
+    return result;
+}
+
+bool BamMultiReader::SetRegion(const int& leftRefID, const int& leftPosition, const int& rightRefID, const int& rightPosition) {
+    BamRegion region(leftRefID, leftPosition, rightRefID, rightPosition);
+    return SetRegion(region);
+}
+
+bool BamMultiReader::SetRegion(const BamRegion& region) {
+
+    Region = region;
+
+    // NB: While it may make sense to track readers in which we can
+    // successfully SetRegion, In practice a failure of SetRegion means "no
+    // alignments here."  It makes sense to simply accept the failure,
+    // UpdateAlignments(), and continue.
+
+    for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
+        if (!it->first->SetRegion(region)) {
+            cerr << "ERROR: could not jump " << it->first->GetFilename() << " to "
+                << region.LeftRefID << ":" << region.LeftPosition
+                << ".." << region.RightRefID << ":" << region.RightPosition << endl;
+        }
+    }
+
+    UpdateAlignments();
+    return true;
+}
+
+void BamMultiReader::UpdateAlignments(void) {
+    // Update Alignments
+    alignments.clear();
+    for (vector<pair<BamReader*, BamAlignment*> >::iterator it = readers.begin(); it != readers.end(); ++it) {
+        BamReader* br = it->first;
+        BamAlignment* ba = it->second;
+        if (br->GetNextAlignment(*ba)) {
+            alignments.insert(make_pair(make_pair(ba->RefID, ba->Position), 
+                                        make_pair(br, ba)));
+        } else {
+            // assume BamReader end of region / EOF
+        }
+    }
+}
+
+// updates the reference id stored in the BamMultiReader
+// to reflect the current state of the readers
+void BamMultiReader::UpdateReferenceID(void) {
+    // the alignments are sorted by position, so the first alignment will always have the lowest reference ID
+    if (alignments.begin()->second.second->RefID != CurrentRefID) {
+        // get the next reference id
+        // while there aren't any readers at the next ref id
+        // increment the ref id
+        int nextRefID = CurrentRefID;
+        while (alignments.begin()->second.second->RefID != nextRefID) {
+            ++nextRefID;
+        }
+        //cerr << "updating reference id from " << CurrentRefID << " to " << nextRefID << endl;
+        CurrentRefID = nextRefID;
+    }
+}
+
+// ValidateReaders checks that all the readers point to BAM files representing
+// alignments against the same set of reference sequences, and that the
+// sequences are identically ordered.  If these checks fail the operation of
+// the multireader is undefined, so we force program exit.
+void BamMultiReader::ValidateReaders(void) const {
+    int firstRefCount = readers.front().first->GetReferenceCount();
+    BamTools::RefVector firstRefData = readers.front().first->GetReferenceData();
+    for (vector<pair<BamReader*, BamAlignment*> >::const_iterator it = readers.begin(); it != readers.end(); ++it) {
+        BamReader* reader = it->first;
+        BamTools::RefVector currentRefData = reader->GetReferenceData();
+        BamTools::RefVector::const_iterator f = firstRefData.begin();
+        BamTools::RefVector::const_iterator c = currentRefData.begin();
+        if (reader->GetReferenceCount() != firstRefCount || firstRefData.size() != currentRefData.size()) {
+            cerr << "ERROR: mismatched number of references in " << reader->GetFilename()
+                      << " expected " << firstRefCount 
+                      << " reference sequences but only found " << reader->GetReferenceCount() << endl;
+            exit(1);
+        }
+        // this will be ok; we just checked above that we have identically-sized sets of references
+        // here we simply check if they are all, in fact, equal in content
+        while (f != firstRefData.end()) {
+            if (f->RefName != c->RefName || f->RefLength != c->RefLength) {
+                cerr << "ERROR: mismatched references found in " << reader->GetFilename()
+                          << " expected: " << endl;
+                for (BamTools::RefVector::const_iterator a = firstRefData.begin(); a != firstRefData.end(); ++a)
+                    cerr << a->RefName << " " << a->RefLength << endl;
+                cerr << "but found: " << endl;
+                for (BamTools::RefVector::const_iterator a = currentRefData.begin(); a != currentRefData.end(); ++a)
+                    cerr << a->RefName << " " << a->RefLength << endl;
+                exit(1);
+            }
+            ++f; ++c;
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamMultiReader.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,136 @@
+// ***************************************************************************
+// BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Functionality for simultaneously reading multiple BAM files
+// ***************************************************************************
+
+#ifndef BAMMULTIREADER_H
+#define BAMMULTIREADER_H
+
+#include <api_global.h>
+#include <BamReader.h>
+#include <map>
+#include <sstream>
+#include <string>
+#include <utility>
+
+namespace BamTools {
+
+// index mapping reference/position pairings to bamreaders and their alignments
+typedef std::multimap<std::pair<int, int>, std::pair<BamReader*, BamAlignment*> > AlignmentIndex;
+
+class API_EXPORT BamMultiReader {
+
+    // constructor / destructor
+    public:
+        BamMultiReader(void);
+        ~BamMultiReader(void);
+
+    // public interface
+    public:
+
+        // positioning
+        int CurrentRefID;
+        int CurrentLeft;
+
+        // region under analysis, specified using SetRegion
+        BamRegion Region;
+
+        // ----------------------
+        // BAM file operations
+        // ----------------------
+
+        // close BAM files
+        void Close(void);
+
+        // opens BAM files (and optional BAM index files, if provided)
+        // @openIndexes - triggers index opening, useful for suppressing
+        // error messages during merging of files in which we may not have
+        // indexes.
+        // @coreMode - setup our first alignments using GetNextAlignmentCore();
+        // also useful for merging
+        // @preferStandardIndex - look for standard BAM index ".bai" first.  If false, 
+        // will look for BamTools index ".bti".  
+        bool Open(const std::vector<std::string>& filenames, bool openIndexes = true, bool coreMode = false, bool preferStandardIndex = false);
+
+        // returns whether underlying BAM readers ALL have an index loaded
+        // this is useful to indicate whether Jump() or SetRegion() are possible
+        bool IsIndexLoaded(void) const;
+        
+        // performs random-access jump to reference, position
+        bool Jump(int refID, int position = 0);
+
+        // sets the target region
+        bool SetRegion(const BamRegion& region);
+        bool SetRegion(const int&, const int&, const int&, const int&); // convenience function to above
+
+        // returns file pointers to beginning of alignments
+        bool Rewind(void);
+
+        // ----------------------
+        // access alignment data
+        // ----------------------
+        // updates the reference id marker to match the lower limit of our readers
+        void UpdateReferenceID(void);
+
+        // retrieves next available alignment (returns success/fail) from all files
+        bool GetNextAlignment(BamAlignment&);
+        // retrieves next available alignment (returns success/fail) from all files
+        // and populates the support data with information about the alignment
+        // *** BUT DOES NOT PARSE CHARACTER DATA FROM THE ALIGNMENT
+        bool GetNextAlignmentCore(BamAlignment&);
+        // ... should this be private?
+        bool HasOpenReaders(void);
+
+        // ----------------------
+        // access auxiliary data
+        // ----------------------
+
+        // returns unified SAM header text for all files
+        const std::string GetHeaderText(void) const;
+        // returns number of reference sequences
+        const int GetReferenceCount(void) const;
+        // returns vector of reference objects
+        const BamTools::RefVector GetReferenceData(void) const;
+        // returns reference id (used for BamMultiReader::Jump()) for the given reference name
+        const int GetReferenceID(const std::string& refName) const;
+        // validates that we have a congruent set of BAM files that are aligned against the same reference sequences
+        void ValidateReaders() const;
+
+        // ----------------------
+        // BAM index operations
+        // ----------------------
+
+        // creates index for BAM files which lack them, saves to files (default = bamFilename + ".bai")
+        bool CreateIndexes(bool useStandardIndex = true);
+
+        // sets the index caching mode for the readers
+        void SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode);
+
+        //const int GetReferenceID(const string& refName) const;
+
+        // utility
+        void PrintFilenames(void);
+        void DumpAlignmentIndex(void);
+        void UpdateAlignments(void); // updates our alignment cache
+
+    // private implementation
+    private:
+
+        // the set of readers and alignments which we operate on, maintained throughout the life of this class
+        std::vector<std::pair<BamReader*, BamAlignment*> > readers;
+
+        // readers and alignments sorted by reference id and position, to keep track of the lowest (next) alignment
+        // when a reader reaches EOF, its entry is removed from this index
+        AlignmentIndex alignments;
+
+        std::vector<std::string> fileNames;
+};
+
+} // namespace BamTools
+
+#endif // BAMMULTIREADER_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamReader.cpp	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,66 @@
+// ***************************************************************************
+// BamReader.cpp (c) 2009 Derek Barnett, Michael Str�mberg
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 22 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading BAM files
+// ***************************************************************************
+
+#include <BamReader.h>
+#include <BamReader_p.h>
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <string>
+#include <vector>
+using namespace std;
+
+// constructor
+BamReader::BamReader(void) {
+    d = new BamReaderPrivate(this);
+}
+
+// destructor
+BamReader::~BamReader(void) {
+    delete d;
+    d = 0;
+}
+
+// file operations
+void BamReader::Close(void) { d->Close(); }
+bool BamReader::HasIndex(void) const { return d->HasIndex; }
+bool BamReader::IsIndexLoaded(void) const { return HasIndex(); }
+bool BamReader::IsOpen(void) const { return d->mBGZF.IsOpen; }
+bool BamReader::Jump(int refID, int position)  { return d->SetRegion( BamRegion(refID, position) ); }
+bool BamReader::Open(const std::string& filename,
+                     const std::string& indexFilename,
+                     const bool lookForIndex,
+                     const bool preferStandardIndex)
+{
+    return d->Open(filename, indexFilename, lookForIndex, preferStandardIndex);
+}
+bool BamReader::Rewind(void) { return d->Rewind(); }
+bool BamReader::SetRegion(const BamRegion& region) { return d->SetRegion(region); }
+bool BamReader::SetRegion(const int& leftRefID, const int& leftBound, const int& rightRefID, const int& rightBound) {
+    return d->SetRegion( BamRegion(leftRefID, leftBound, rightRefID, rightBound) );
+}
+
+// access alignment data
+bool BamReader::GetNextAlignment(BamAlignment& bAlignment) { return d->GetNextAlignment(bAlignment); }
+bool BamReader::GetNextAlignmentCore(BamAlignment& bAlignment) { return d->GetNextAlignmentCore(bAlignment); }
+
+// access auxiliary data
+const string BamReader::GetHeaderText(void) const { return d->GetHeaderText(); }
+int BamReader::GetReferenceCount(void) const { return d->References.size(); }
+const RefVector& BamReader::GetReferenceData(void) const { return d->References; }
+int BamReader::GetReferenceID(const string& refName) const { return d->GetReferenceID(refName); }
+const std::string BamReader::GetFilename(void) const { return d->Filename; }
+
+// index operations
+bool BamReader::CreateIndex(bool useStandardIndex) { return d->CreateIndex(useStandardIndex); }
+void BamReader::SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode) { d->SetIndexCacheMode(mode); }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamReader.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,130 @@
+// ***************************************************************************
+// BamReader.h (c) 2009 Derek Barnett, Michael Str�mberg
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading BAM files
+// ***************************************************************************
+
+#ifndef BAMREADER_H
+#define BAMREADER_H
+
+#include <api_global.h>
+#include <BamAlignment.h>
+#include <BamIndex.h>
+#include <string>
+
+namespace BamTools {
+  
+namespace Internal {
+    class BamReaderPrivate;
+} // namespace Internal
+
+class API_EXPORT BamReader {
+
+    // constructor / destructor
+    public:
+        BamReader(void);
+        ~BamReader(void);
+
+    // public interface
+    public:
+
+        // ----------------------
+        // BAM file operations
+        // ----------------------
+
+        // close BAM file
+        void Close(void);
+        // returns whether reader is open for reading or not
+        bool IsOpen(void) const;
+        // performs random-access jump using (reference, position) as a left-bound
+        bool Jump(int refID, int position = 0);
+        // opens BAM file (and optional BAM index file, if provided)
+        // @lookForIndex - if no indexFilename provided, look in BAM file's directory for an existing index file
+        //   default behavior is to skip index file search if no index filename given
+        // @preferStandardIndex - if true, give priority in index file searching to standard BAM index (*.bai)
+        //   default behavior is to prefer the BamToolsIndex (*.bti) if both are available
+        bool Open(const std::string& filename, 
+                  const std::string& indexFilename = "", 
+                  const bool lookForIndex = false, 
+                  const bool preferStandardIndex = false);
+        // returns file pointer to beginning of alignments
+        bool Rewind(void);
+        // sets a region of interest (with left & right bound reference/position)
+        // returns success/failure of seeking to left bound of region
+        bool SetRegion(const BamRegion& region);
+        bool SetRegion(const int& leftRefID, const int& leftBound, const int& rightRefID, const int& rightBound);
+
+        // ----------------------
+        // access alignment data
+        // ----------------------
+
+        // retrieves next available alignment (returns success/fail)
+        bool GetNextAlignment(BamAlignment& bAlignment);
+        // retrieves next available alignment core data (returns success/fail)
+        // ** DOES NOT parse any character data (read name, bases, qualities, tag data) **
+        // useful for operations requiring ONLY aligner-related information 
+        // (refId/position, alignment flags, CIGAR, mapQuality, etc)
+        bool GetNextAlignmentCore(BamAlignment& bAlignment);
+
+        // ----------------------
+        // access auxiliary data
+        // ----------------------
+
+        // returns SAM header text
+        const std::string GetHeaderText(void) const;
+        // returns number of reference sequences
+        int GetReferenceCount(void) const;
+        // returns vector of reference objects
+        const BamTools::RefVector& GetReferenceData(void) const;
+        // returns reference id (used for BamReader::Jump()) for the given reference name
+        int GetReferenceID(const std::string& refName) const;
+        // returns the name of the file associated with this BamReader
+        const std::string GetFilename(void) const;
+
+        // ----------------------
+        // BAM index operations
+        // ----------------------
+
+        // creates index for BAM file, saves to file
+        // default behavior is to create the BAM standard index (".bai")
+        // set flag to false to create the BamTools-specific index (".bti")
+        bool CreateIndex(bool useStandardIndex = true);
+        // returns whether index data is available for reading
+        // (e.g. if true, BamReader should be able to seek to a region)
+        bool HasIndex(void) const;
+        // change the index caching behavior
+        // default BamReader/Index mode is LimitedIndexCaching
+        // @mode - can be either FullIndexCaching, LimitedIndexCaching,
+        //   or NoIndexCaching. See BamIndex.h for more details
+        void SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode);
+	
+    // deprecated methods
+    public:
+	
+        // deprecated (but still available): prefer HasIndex() instead
+        //
+        // Deprecated purely for API semantic clarity - HasIndex() should be clearer
+        // than IsIndexLoaded() in light of the new caching modes that may clear the
+        // index data from memory, but leave the index file open for later random access
+        // seeks.
+        //
+        // For example, what would (IsIndexLoaded() == true) mean when cacheMode has been
+        // explicitly set to NoIndexCaching? This is confusing at best, misleading about
+        // current memory behavior at worst.
+        //
+        // returns whether index data is available
+        // (e.g. if true, BamReader should be able to seek to a region)
+        bool IsIndexLoaded(void) const;
+        
+    // private implementation
+    private:
+        Internal::BamReaderPrivate* d;
+};
+
+} // namespace BamTools
+
+#endif // BAMREADER_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamReader_p.cpp	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,729 @@
+// ***************************************************************************
+// BamReader_p.cpp (c) 2009 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 22 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading BAM files
+// ***************************************************************************
+
+#include <BamReader.h>
+#include <BGZF.h>
+#include <BamReader_p.h>
+#include <BamStandardIndex_p.h>
+#include <BamToolsIndex_p.h>
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <algorithm>
+#include <iostream>
+#include <iterator>
+#include <vector>
+using namespace std;
+
+// constructor
+BamReaderPrivate::BamReaderPrivate(BamReader* parent)
+    : HeaderText("")
+    , Index(0)
+    , HasIndex(false)
+    , AlignmentsBeginOffset(0)
+//    , m_header(0)
+    , IndexCacheMode(BamIndex::LimitedIndexCaching)
+    , HasAlignmentsInRegion(true)
+    , Parent(parent)
+    , DNA_LOOKUP("=ACMGRSVTWYHKDBN")
+    , CIGAR_LOOKUP("MIDNSHP")
+{
+    IsBigEndian = SystemIsBigEndian();
+}
+
+// destructor
+BamReaderPrivate::~BamReaderPrivate(void) {
+    Close();
+}
+
+// adjusts requested region if necessary (depending on where data actually begins)
+void BamReaderPrivate::AdjustRegion(BamRegion& region) {
+
+    // check for valid index first
+    if ( Index == 0 ) return;
+
+    // see if any references in region have alignments
+    HasAlignmentsInRegion = false;
+    int currentId = region.LeftRefID;
+
+    const int rightBoundRefId = ( region.isRightBoundSpecified() ? region.RightRefID : References.size() - 1 );
+    while ( currentId <= rightBoundRefId ) {
+	HasAlignmentsInRegion = Index->HasAlignments(currentId);
+	if ( HasAlignmentsInRegion ) break;
+	++currentId;
+    }
+
+    // if no data found on any reference in region
+    if ( !HasAlignmentsInRegion ) return;
+
+    // if left bound of desired region had no data, use first reference that had data
+    // otherwise, leave requested region as-is
+    if ( currentId != region.LeftRefID ) {
+	region.LeftRefID = currentId;
+	region.LeftPosition = 0;
+    }
+}
+
+// fills out character data for BamAlignment data
+bool BamReaderPrivate::BuildCharData(BamAlignment& bAlignment) {
+
+    // calculate character lengths/offsets
+    const unsigned int dataLength     = bAlignment.SupportData.BlockLength - BAM_CORE_SIZE;
+    const unsigned int seqDataOffset  = bAlignment.SupportData.QueryNameLength + (bAlignment.SupportData.NumCigarOperations * 4);
+    const unsigned int qualDataOffset = seqDataOffset + (bAlignment.SupportData.QuerySequenceLength+1)/2;
+    const unsigned int tagDataOffset  = qualDataOffset + bAlignment.SupportData.QuerySequenceLength;
+    const unsigned int tagDataLength  = dataLength - tagDataOffset;
+
+    // check offsets to see what char data exists
+    const bool hasSeqData  = ( seqDataOffset  < dataLength );
+    const bool hasQualData = ( qualDataOffset < dataLength );
+    const bool hasTagData  = ( tagDataOffset  < dataLength );
+
+    // set up char buffers
+    const char* allCharData = bAlignment.SupportData.AllCharData.data();
+    const char* seqData     = ( hasSeqData  ? (((const char*)allCharData) + seqDataOffset)  : (const char*)0 );
+    const char* qualData    = ( hasQualData ? (((const char*)allCharData) + qualDataOffset) : (const char*)0 );
+          char* tagData     = ( hasTagData  ? (((char*)allCharData) + tagDataOffset)        : (char*)0 );
+
+    // store alignment name (relies on null char in name as terminator)
+    bAlignment.Name.assign((const char*)(allCharData));
+
+    // save query sequence
+    bAlignment.QueryBases.clear();
+    if ( hasSeqData ) {
+        bAlignment.QueryBases.reserve(bAlignment.SupportData.QuerySequenceLength);
+        for (unsigned int i = 0; i < bAlignment.SupportData.QuerySequenceLength; ++i) {
+            char singleBase = DNA_LOOKUP[ ( (seqData[(i/2)] >> (4*(1-(i%2)))) & 0xf ) ];
+            bAlignment.QueryBases.append(1, singleBase);
+        }
+    }
+
+    // save qualities, converting from numeric QV to 'FASTQ-style' ASCII character
+    bAlignment.Qualities.clear();
+    if ( hasQualData ) {
+        bAlignment.Qualities.reserve(bAlignment.SupportData.QuerySequenceLength);
+        for (unsigned int i = 0; i < bAlignment.SupportData.QuerySequenceLength; ++i) {
+            char singleQuality = (char)(qualData[i]+33);
+            bAlignment.Qualities.append(1, singleQuality);
+        }
+    }
+
+    // if QueryBases is empty (and this is a allowed case)
+    if ( bAlignment.QueryBases.empty() )
+        bAlignment.AlignedBases = bAlignment.QueryBases;
+
+    // if QueryBases contains data, then build AlignedBases using CIGAR data
+    else {
+
+        // resize AlignedBases
+        bAlignment.AlignedBases.clear();
+        bAlignment.AlignedBases.reserve(bAlignment.SupportData.QuerySequenceLength);
+
+        // iterate over CigarOps
+        int k = 0;
+        vector<CigarOp>::const_iterator cigarIter = bAlignment.CigarData.begin();
+        vector<CigarOp>::const_iterator cigarEnd  = bAlignment.CigarData.end();
+        for ( ; cigarIter != cigarEnd; ++cigarIter ) {
+
+            const CigarOp& op = (*cigarIter);
+            switch(op.Type) {
+
+            case ('M') :
+            case ('I') :
+                bAlignment.AlignedBases.append(bAlignment.QueryBases.substr(k, op.Length)); // for 'M', 'I' - write bases
+                // fall through
+
+            case ('S') :
+                k += op.Length;                                     // for 'S' - soft clip, skip over query bases
+                break;
+
+            case ('D') :
+                bAlignment.AlignedBases.append(op.Length, '-');     // for 'D' - write gap character
+                break;
+
+            case ('P') :
+                bAlignment.AlignedBases.append( op.Length, '*' );   // for 'P' - write padding character
+                break;
+
+            case ('N') :
+                bAlignment.AlignedBases.append( op.Length, 'N' );  // for 'N' - write N's, skip bases in original query sequence
+                break;
+
+            case ('H') :
+                break;  // for 'H' - hard clip, do nothing to AlignedBases, move to next op
+
+            default:
+                fprintf(stderr, "ERROR: Invalid Cigar op type\n"); // shouldn't get here
+                exit(1);
+            }
+        }
+    }
+
+    // save tag data
+    bAlignment.TagData.clear();
+    if ( hasTagData ) {
+        if ( IsBigEndian ) {
+            int i = 0;
+            while ( (unsigned int)i < tagDataLength ) {
+
+                i += 2; // skip tag type (e.g. "RG", "NM", etc)
+                uint8_t type = toupper(tagData[i]);     // lower & upper case letters have same meaning
+                ++i;                                    // skip value type
+
+                switch (type) {
+
+                    case('A') :
+                    case('C') :
+                        ++i;
+                        break;
+
+                    case('S') :
+                        SwapEndian_16p(&tagData[i]);
+                        i += sizeof(uint16_t);
+                        break;
+
+                    case('F') :
+                    case('I') :
+                        SwapEndian_32p(&tagData[i]);
+                        i += sizeof(uint32_t);
+                        break;
+
+                    case('D') :
+                        SwapEndian_64p(&tagData[i]);
+                        i += sizeof(uint64_t);
+                        break;
+
+                    case('H') :
+                    case('Z') :
+                        while (tagData[i]) { ++i; }
+                        ++i; // increment one more for null terminator
+                        break;
+
+                    default :
+                        fprintf(stderr, "ERROR: Invalid tag value type\n"); // shouldn't get here
+                        exit(1);
+                }
+            }
+        }
+
+        // store tagData in alignment
+        bAlignment.TagData.resize(tagDataLength);
+        memcpy((char*)bAlignment.TagData.data(), tagData, tagDataLength);
+    }
+
+    // clear the core-only flag
+    bAlignment.SupportData.HasCoreOnly = false;
+
+    // return success
+    return true;
+}
+
+// clear index data structure
+void BamReaderPrivate::ClearIndex(void) {
+    delete Index;
+    Index = 0;
+    HasIndex = false;
+}
+
+// closes the BAM file
+void BamReaderPrivate::Close(void) {
+
+    // close BGZF file stream
+    mBGZF.Close();
+
+    // clear out index data
+    ClearIndex();
+
+    // clear out header data
+    HeaderText.clear();
+//    if ( m_header ) {
+//	delete m_header;
+//	m_header = 0;
+//    }
+
+    // clear out region flags
+    Region.clear();
+}
+
+// creates index for BAM file, saves to file
+// default behavior is to create the BAM standard index (".bai")
+// set flag to false to create the BamTools-specific index (".bti")
+bool BamReaderPrivate::CreateIndex(bool useStandardIndex) {
+
+    // clear out prior index data
+    ClearIndex();
+
+    // create index based on type requested
+    if ( useStandardIndex )
+	Index = new BamStandardIndex(&mBGZF, Parent);
+    else
+	Index = new BamToolsIndex(&mBGZF, Parent);
+
+    // set index cache mode to full for writing
+    Index->SetCacheMode(BamIndex::FullIndexCaching);
+
+    // build new index
+    bool ok = true;
+    ok &= Index->Build();
+    HasIndex = ok;
+
+    // mark empty references
+    MarkReferences();
+
+    // attempt to save index data to file
+    ok &= Index->Write(Filename);
+
+    // set client's desired index cache mode
+    Index->SetCacheMode(IndexCacheMode);
+
+    // return success/fail of both building & writing index
+    return ok;
+}
+
+const string BamReaderPrivate::GetHeaderText(void) const {
+
+    return HeaderText;
+
+//    if ( m_header )
+//	return m_header->Text();
+//    else
+//	return string("");
+}
+
+// get next alignment (from specified region, if given)
+bool BamReaderPrivate::GetNextAlignment(BamAlignment& bAlignment) {
+
+    // if valid alignment found, attempt to parse char data, and return success/failure
+    if ( GetNextAlignmentCore(bAlignment) )
+	return BuildCharData(bAlignment);
+
+    // no valid alignment found
+    else return false;
+}
+
+// retrieves next available alignment core data (returns success/fail)
+// ** DOES NOT parse any character data (read name, bases, qualities, tag data)
+//    these can be accessed, if necessary, from the supportData
+// useful for operations requiring ONLY positional or other alignment-related information
+bool BamReaderPrivate::GetNextAlignmentCore(BamAlignment& bAlignment) {
+
+    // if region is set but has no alignments
+    if ( !Region.isNull() && !HasAlignmentsInRegion )
+	return false;
+
+    // if valid alignment available
+    if ( LoadNextAlignment(bAlignment) ) {
+
+	// set core-only flag
+	bAlignment.SupportData.HasCoreOnly = true;
+
+	// if region not specified with at least a left boundary, return success
+	if ( !Region.isLeftBoundSpecified() ) return true;
+
+	// determine region state (before, within, after)
+	BamReaderPrivate::RegionState state = IsOverlap(bAlignment);
+
+	// if alignment lies after region, return false
+	if ( state == AFTER_REGION ) return false;
+
+	while ( state != WITHIN_REGION ) {
+	    // if no valid alignment available (likely EOF) return failure
+	    if ( !LoadNextAlignment(bAlignment) ) return false;
+	    // if alignment lies after region, return false (no available read within region)
+	    state = IsOverlap(bAlignment);
+	    if ( state == AFTER_REGION ) return false;
+	}
+
+	// return success (alignment found that overlaps region)
+	return true;
+    }
+
+    // no valid alignment
+    else return false;
+}
+
+// returns RefID for given RefName (returns References.size() if not found)
+int BamReaderPrivate::GetReferenceID(const string& refName) const {
+
+    // retrieve names from reference data
+    vector<string> refNames;
+    RefVector::const_iterator refIter = References.begin();
+    RefVector::const_iterator refEnd  = References.end();
+    for ( ; refIter != refEnd; ++refIter)
+	refNames.push_back( (*refIter).RefName );
+
+    // return 'index-of' refName ( if not found, returns refNames.size() )
+    return distance(refNames.begin(), find(refNames.begin(), refNames.end(), refName));
+}
+
+// returns region state - whether alignment ends before, overlaps, or starts after currently specified region
+// this *internal* method should ONLY called when (at least) IsLeftBoundSpecified == true
+BamReaderPrivate::RegionState BamReaderPrivate::IsOverlap(BamAlignment& bAlignment) {
+
+    // if alignment is on any reference sequence before left bound
+    if ( bAlignment.RefID < Region.LeftRefID ) return BEFORE_REGION;
+
+    // if alignment starts on left bound reference
+    else if ( bAlignment.RefID == Region.LeftRefID ) {
+
+	// if alignment starts at or after left boundary
+	if ( bAlignment.Position >= Region.LeftPosition) {
+
+	    // if right boundary is specified AND
+	    // left/right boundaries are on same reference AND
+	    // alignment starts past right boundary
+	    if ( Region.isRightBoundSpecified() &&
+		 Region.LeftRefID == Region.RightRefID &&
+		 bAlignment.Position > Region.RightPosition )
+		return AFTER_REGION;
+
+	    // otherwise, alignment is within region
+	    return WITHIN_REGION;
+	}
+
+	// alignment starts before left boundary
+	else {
+	    // check if alignment overlaps left boundary
+	    if ( bAlignment.GetEndPosition() >= Region.LeftPosition ) return WITHIN_REGION;
+	    else return BEFORE_REGION;
+	}
+    }
+
+    // alignment starts on a reference after the left bound
+    else {
+
+	// if region has a right boundary
+	if ( Region.isRightBoundSpecified() ) {
+
+	    // alignment is on reference between boundaries
+	    if ( bAlignment.RefID < Region.RightRefID ) return WITHIN_REGION;
+
+	    // alignment is on reference after right boundary
+	    else if ( bAlignment.RefID > Region.RightRefID ) return AFTER_REGION;
+
+	    // alignment is on right bound reference
+	    else {
+		// check if alignment starts before or at right boundary
+		if ( bAlignment.Position <= Region.RightPosition ) return WITHIN_REGION;
+		else return AFTER_REGION;
+	    }
+	}
+
+	// otherwise, alignment is after left bound reference, but there is no right boundary
+	else return WITHIN_REGION;
+    }
+}
+
+// load BAM header data
+void BamReaderPrivate::LoadHeaderData(void) {
+
+//    m_header = new BamHeader(&mBGZF);
+//    bool headerLoadedOk = m_header->Load();
+//    if ( !headerLoadedOk )
+//	cerr << "BamReader could not load header" << endl;
+
+    // check to see if proper BAM header
+    char buffer[4];
+    if (mBGZF.Read(buffer, 4) != 4) {
+	fprintf(stderr, "Could not read header type\n");
+	exit(1);
+    }
+
+    if (strncmp(buffer, "BAM\001", 4)) {
+	fprintf(stderr, "wrong header type!\n");
+	exit(1);
+    }
+
+    // get BAM header text length
+    mBGZF.Read(buffer, 4);
+    unsigned int headerTextLength = BgzfData::UnpackUnsignedInt(buffer);
+    if ( IsBigEndian ) SwapEndian_32(headerTextLength);
+
+    // get BAM header text
+    char* headerText = (char*)calloc(headerTextLength + 1, 1);
+    mBGZF.Read(headerText, headerTextLength);
+    HeaderText = (string)((const char*)headerText);
+
+    // clean up calloc-ed temp variable
+    free(headerText);
+}
+
+// load existing index data from BAM index file (".bti" OR ".bai"), return success/fail
+bool BamReaderPrivate::LoadIndex(const bool lookForIndex, const bool preferStandardIndex) {
+
+    // clear out any existing index data
+    ClearIndex();
+
+    // if no index filename provided, so we need to look for available index files
+    if ( IndexFilename.empty() ) {
+
+	// attempt to load BamIndex based on current Filename provided & preferStandardIndex flag
+	const BamIndex::PreferredIndexType type = (preferStandardIndex ? BamIndex::STANDARD : BamIndex::BAMTOOLS);
+	Index = BamIndex::FromBamFilename(Filename, &mBGZF, Parent, type);
+
+	// if null, return failure
+	if ( Index == 0 ) return false;
+
+	// generate proper IndexFilename based on type of index created
+	IndexFilename = Filename + Index->Extension();
+    }
+
+    else {
+
+	// attempt to load BamIndex based on IndexFilename provided by client
+	Index = BamIndex::FromIndexFilename(IndexFilename, &mBGZF, Parent);
+
+	// if null, return failure
+	if ( Index == 0 ) return false;
+    }
+
+    // set cache mode for BamIndex
+    Index->SetCacheMode(IndexCacheMode);
+
+    // loading the index data from file
+    HasIndex = Index->Load(IndexFilename);
+
+    // mark empty references
+    MarkReferences();
+
+    // return index status
+    return HasIndex;
+}
+
+// populates BamAlignment with alignment data under file pointer, returns success/fail
+bool BamReaderPrivate::LoadNextAlignment(BamAlignment& bAlignment) {
+
+    // read in the 'block length' value, make sure it's not zero
+    char buffer[4];
+    mBGZF.Read(buffer, 4);
+    bAlignment.SupportData.BlockLength = BgzfData::UnpackUnsignedInt(buffer);
+    if ( IsBigEndian ) { SwapEndian_32(bAlignment.SupportData.BlockLength); }
+    if ( bAlignment.SupportData.BlockLength == 0 ) return false;
+
+    // read in core alignment data, make sure the right size of data was read
+    char x[BAM_CORE_SIZE];
+    if ( mBGZF.Read(x, BAM_CORE_SIZE) != BAM_CORE_SIZE ) return false;
+
+    if ( IsBigEndian ) {
+	for ( int i = 0; i < BAM_CORE_SIZE; i+=sizeof(uint32_t) )
+	    SwapEndian_32p(&x[i]);
+    }
+
+    // set BamAlignment 'core' and 'support' data
+    bAlignment.RefID    = BgzfData::UnpackSignedInt(&x[0]);
+    bAlignment.Position = BgzfData::UnpackSignedInt(&x[4]);
+
+    unsigned int tempValue = BgzfData::UnpackUnsignedInt(&x[8]);
+    bAlignment.Bin        = tempValue >> 16;
+    bAlignment.MapQuality = tempValue >> 8 & 0xff;
+    bAlignment.SupportData.QueryNameLength = tempValue & 0xff;
+
+    tempValue = BgzfData::UnpackUnsignedInt(&x[12]);
+    bAlignment.AlignmentFlag = tempValue >> 16;
+    bAlignment.SupportData.NumCigarOperations = tempValue & 0xffff;
+
+    bAlignment.SupportData.QuerySequenceLength = BgzfData::UnpackUnsignedInt(&x[16]);
+    bAlignment.MateRefID    = BgzfData::UnpackSignedInt(&x[20]);
+    bAlignment.MatePosition = BgzfData::UnpackSignedInt(&x[24]);
+    bAlignment.InsertSize   = BgzfData::UnpackSignedInt(&x[28]);
+
+    // set BamAlignment length
+    bAlignment.Length = bAlignment.SupportData.QuerySequenceLength;
+
+    // read in character data - make sure proper data size was read
+    bool readCharDataOK = false;
+    const unsigned int dataLength = bAlignment.SupportData.BlockLength - BAM_CORE_SIZE;
+    char* allCharData = (char*)calloc(sizeof(char), dataLength);
+
+    if ( mBGZF.Read(allCharData, dataLength) == (signed int)dataLength) {
+
+	// store 'allCharData' in supportData structure
+	bAlignment.SupportData.AllCharData.assign((const char*)allCharData, dataLength);
+
+	// set success flag
+	readCharDataOK = true;
+
+	// save CIGAR ops
+	// need to calculate this here so that  BamAlignment::GetEndPosition() performs correctly,
+	// even when GetNextAlignmentCore() is called
+	const unsigned int cigarDataOffset = bAlignment.SupportData.QueryNameLength;
+	uint32_t* cigarData = (uint32_t*)(allCharData + cigarDataOffset);
+	CigarOp op;
+	bAlignment.CigarData.clear();
+	bAlignment.CigarData.reserve(bAlignment.SupportData.NumCigarOperations);
+	for (unsigned int i = 0; i < bAlignment.SupportData.NumCigarOperations; ++i) {
+
+	    // swap if necessary
+	    if ( IsBigEndian ) SwapEndian_32(cigarData[i]);
+
+	    // build CigarOp structure
+	    op.Length = (cigarData[i] >> BAM_CIGAR_SHIFT);
+	    op.Type   = CIGAR_LOOKUP[ (cigarData[i] & BAM_CIGAR_MASK) ];
+
+	    // save CigarOp
+	    bAlignment.CigarData.push_back(op);
+	}
+    }
+
+    free(allCharData);
+    return readCharDataOK;
+}
+
+// loads reference data from BAM file
+void BamReaderPrivate::LoadReferenceData(void) {
+
+    // get number of reference sequences
+    char buffer[4];
+    mBGZF.Read(buffer, 4);
+    unsigned int numberRefSeqs = BgzfData::UnpackUnsignedInt(buffer);
+    if ( IsBigEndian ) SwapEndian_32(numberRefSeqs);
+    if ( numberRefSeqs == 0 ) return;
+    References.reserve((int)numberRefSeqs);
+
+    // iterate over all references in header
+    for (unsigned int i = 0; i != numberRefSeqs; ++i) {
+
+	// get length of reference name
+	mBGZF.Read(buffer, 4);
+	unsigned int refNameLength = BgzfData::UnpackUnsignedInt(buffer);
+	if ( IsBigEndian ) SwapEndian_32(refNameLength);
+	char* refName = (char*)calloc(refNameLength, 1);
+
+	// get reference name and reference sequence length
+	mBGZF.Read(refName, refNameLength);
+	mBGZF.Read(buffer, 4);
+	int refLength = BgzfData::UnpackSignedInt(buffer);
+	if ( IsBigEndian ) SwapEndian_32(refLength);
+
+	// store data for reference
+	RefData aReference;
+	aReference.RefName   = (string)((const char*)refName);
+	aReference.RefLength = refLength;
+	References.push_back(aReference);
+
+	// clean up calloc-ed temp variable
+	free(refName);
+    }
+}
+
+// mark references with no alignment data
+void BamReaderPrivate::MarkReferences(void) {
+
+    // ensure index is available
+    if ( !HasIndex ) return;
+
+    // mark empty references
+    for ( int i = 0; i < (int)References.size(); ++i )
+	References.at(i).RefHasAlignments = Index->HasAlignments(i);
+}
+
+// opens BAM file (and index)
+bool BamReaderPrivate::Open(const string& filename, const string& indexFilename, const bool lookForIndex, const bool preferStandardIndex) {
+
+    // store filenames
+    Filename = filename;
+    IndexFilename = indexFilename;
+
+    // open the BGZF file for reading, return false on failure
+    if ( !mBGZF.Open(filename, "rb") ) return false;
+
+    // retrieve header text & reference data
+    LoadHeaderData();
+    LoadReferenceData();
+
+    // store file offset of first alignment
+    AlignmentsBeginOffset = mBGZF.Tell();
+
+    // if no index filename provided
+    if ( IndexFilename.empty() ) {
+
+	// client did not specify that index SHOULD be found
+	// useful for cases where sequential access is all that is required
+	if ( !lookForIndex ) return true;
+
+	// otherwise, look for index file, return success/fail
+	return LoadIndex(lookForIndex, preferStandardIndex) ;
+    }
+
+    // client supplied an index filename
+    // attempt to load index data, return success/fail
+    return LoadIndex(lookForIndex, preferStandardIndex);
+}
+
+// returns BAM file pointer to beginning of alignment data
+bool BamReaderPrivate::Rewind(void) {
+
+    // rewind to first alignment, return false if unable to seek
+    if ( !mBGZF.Seek(AlignmentsBeginOffset) ) return false;
+
+    // retrieve first alignment data, return false if unable to read
+    BamAlignment al;
+    if ( !LoadNextAlignment(al) ) return false;
+
+    // reset default region info using first alignment in file
+    Region.clear();
+    HasAlignmentsInRegion = true;
+
+    // rewind back to beginning of first alignment
+    // return success/fail of seek
+    return mBGZF.Seek(AlignmentsBeginOffset);
+}
+
+// change the index caching behavior
+void BamReaderPrivate::SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode) {
+    IndexCacheMode = mode;
+    if ( Index == 0 ) return;
+    Index->SetCacheMode(mode);
+}
+
+// asks Index to attempt a Jump() to specified region
+// returns success/failure
+bool BamReaderPrivate::SetRegion(const BamRegion& region) {
+
+    // clear out any prior BamReader region data
+    //
+    // N.B. - this is cleared so that BamIndex now has free reign to call
+    // GetNextAlignmentCore() and do overlap checking without worrying about BamReader
+    // performing any overlap checking of its own and moving on to the next read... Calls
+    // to GetNextAlignmentCore() with no Region set, simply return the next alignment.
+    // This ensures that the Index is able to do just that. (All without exposing
+    // LoadNextAlignment() to the public API, and potentially confusing clients with the nomenclature)
+    Region.clear();
+
+    // check for existing index
+    if ( !HasIndex ) return false;
+
+    // adjust region if necessary to reflect where data actually begins
+    BamRegion adjustedRegion(region);
+    AdjustRegion(adjustedRegion);
+
+    // if no data present, return true
+    // not an error, but BamReader knows that no data is there for future alignment access
+    // (this is useful in a MultiBamReader setting where some BAM files may lack data in regions
+    // that other BAMs have data)
+    if ( !HasAlignmentsInRegion ) {
+	Region = adjustedRegion;
+	return true;
+    }
+
+    // attempt jump to user-specified region return false if jump could not be performed at all
+    // (invalid index, unknown reference, etc)
+    //
+    // Index::Jump() is allowed to modify the HasAlignmentsInRegion flag
+    //  * This covers case where a region is requested that lies beyond the last alignment on a reference
+    //    If this occurs, any subsequent calls to GetNexAlignment[Core] simply return false
+    //    BamMultiReader is then able to successfully pull alignments from a region from multiple files
+    //    even if one or more have no data.
+    if ( !Index->Jump(adjustedRegion, &HasAlignmentsInRegion) ) return false;
+
+    // save region and return success
+    Region = adjustedRegion;
+    return true;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamReader_p.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,137 @@
+// ***************************************************************************
+// BamReader_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for reading BAM files
+// ***************************************************************************
+
+#ifndef BAMREADER_P_H
+#define BAMREADER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail. This header file may change from version to version
+// without notice, or even be removed.
+//
+// We mean it.
+
+#include <BamAlignment.h>
+#include <BamIndex.h>
+#include <BGZF.h>
+#include <string>
+
+namespace BamTools {
+
+class BamReader;
+
+namespace Internal {
+
+class BamReaderPrivate {
+
+    // enums
+    public: enum RegionState { BEFORE_REGION = 0
+			     , WITHIN_REGION
+			     , AFTER_REGION
+			     };
+
+    // ctor & dtor
+    public:
+	BamReaderPrivate(BamReader* parent);
+	~BamReaderPrivate(void);
+
+    // 'public' interface to BamReader
+    public:
+
+	// file operations
+	void Close(void);
+	bool Open(const std::string& filename,
+		  const std::string& indexFilename,
+		  const bool lookForIndex,
+		  const bool preferStandardIndex);
+	bool Rewind(void);
+	bool SetRegion(const BamRegion& region);
+
+	// access alignment data
+	bool GetNextAlignment(BamAlignment& bAlignment);
+	bool GetNextAlignmentCore(BamAlignment& bAlignment);
+
+	// access auxiliary data
+	const std::string GetHeaderText(void) const;
+	int GetReferenceID(const std::string& refName) const;
+
+	// index operations
+	bool CreateIndex(bool useStandardIndex);
+	void SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode);
+
+    // 'internal' methods
+    public:
+
+	// ---------------------------------------
+	// reading alignments and auxiliary data
+
+	// adjusts requested region if necessary (depending on where data actually begins)
+	void AdjustRegion(BamRegion& region);
+	// fills out character data for BamAlignment data
+	bool BuildCharData(BamAlignment& bAlignment);
+	// checks to see if alignment overlaps current region
+	RegionState IsOverlap(BamAlignment& bAlignment);
+	// retrieves header text from BAM file
+	void LoadHeaderData(void);
+	// retrieves BAM alignment under file pointer
+	bool LoadNextAlignment(BamAlignment& bAlignment);
+	// builds reference data structure from BAM file
+	void LoadReferenceData(void);
+	// mark references with 'HasAlignments' status
+	void MarkReferences(void);
+
+	// ---------------------------------
+	// index file handling
+
+	// clear out inernal index data structure
+	void ClearIndex(void);
+	// loads index from BAM index file
+	bool LoadIndex(const bool lookForIndex, const bool preferStandardIndex);
+
+    // data members
+    public:
+
+	// general file data
+	BgzfData  mBGZF;
+	std::string HeaderText;
+	BamIndex* Index;
+	RefVector References;
+	bool      HasIndex;
+	int64_t   AlignmentsBeginOffset;
+	std::string    Filename;
+	std::string    IndexFilename;
+
+//	Internal::BamHeader* m_header;
+
+	// index caching mode
+	BamIndex::BamIndexCacheMode IndexCacheMode;
+
+	// system data
+	bool IsBigEndian;
+
+	// user-specified region values
+	BamRegion Region;
+	bool HasAlignmentsInRegion;
+
+	// parent BamReader
+	BamReader* Parent;
+
+	// BAM character constants
+	const char* DNA_LOOKUP;
+	const char* CIGAR_LOOKUP;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMREADER_P_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamStandardIndex_p.cpp	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,910 @@
+// ***************************************************************************
+// BamStandardIndex.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 22 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the standardized BAM index format (".bai")
+// ***************************************************************************
+
+#include <BamAlignment.h>
+#include <BamReader.h>
+#include <BGZF.h>
+#include <BamStandardIndex_p.h>
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <cstdlib>
+#include <algorithm>
+#include <iostream>
+#include <map>
+using namespace std;
+
+BamStandardIndex::BamStandardIndex(BgzfData* bgzf, BamReader* reader)
+    : BamIndex(bgzf, reader)
+    , m_dataBeginOffset(0)
+    , m_hasFullDataCache(false)
+{
+    m_isBigEndian = BamTools::SystemIsBigEndian();
+}
+
+BamStandardIndex::~BamStandardIndex(void) {
+    ClearAllData();
+}
+
+// calculate bins that overlap region
+int BamStandardIndex::BinsFromRegion(const BamRegion& region,
+				     const bool isRightBoundSpecified,
+				     uint16_t bins[MAX_BIN])
+{
+    // get region boundaries
+    uint32_t begin = (unsigned int)region.LeftPosition;
+    uint32_t end;
+
+    // if right bound specified AND left&right bounds are on same reference
+    // OK to use right bound position
+    if ( isRightBoundSpecified && ( region.LeftRefID == region.RightRefID ) )
+	end = (unsigned int)region.RightPosition;
+
+    // otherwise, use end of left bound reference as cutoff
+    else
+	end = (unsigned int)m_references.at(region.LeftRefID).RefLength - 1;
+
+    // initialize list, bin '0' always a valid bin
+    int i = 0;
+    bins[i++] = 0;
+
+    // get rest of bins that contain this region
+    unsigned int k;
+    for (k =    1 + (begin>>26); k <=    1 + (end>>26); ++k) { bins[i++] = k; }
+    for (k =    9 + (begin>>23); k <=    9 + (end>>23); ++k) { bins[i++] = k; }
+    for (k =   73 + (begin>>20); k <=   73 + (end>>20); ++k) { bins[i++] = k; }
+    for (k =  585 + (begin>>17); k <=  585 + (end>>17); ++k) { bins[i++] = k; }
+    for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { bins[i++] = k; }
+
+    // return number of bins stored
+    return i;
+}
+
+// creates index data (in-memory) from current reader data
+bool BamStandardIndex::Build(void) {
+
+    // be sure reader & BGZF file are valid & open for reading
+    if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
+	return false;
+
+    // move file pointer to beginning of alignments
+    m_reader->Rewind();
+
+    // get reference count, reserve index space
+    const int numReferences = (int)m_references.size();
+    m_indexData.clear();
+    m_hasFullDataCache = false;
+    SetReferenceCount(numReferences);
+
+    // sets default constant for bin, ID, offset, coordinate variables
+    const uint32_t defaultValue = 0xffffffffu;
+
+    // bin data
+    uint32_t saveBin(defaultValue);
+    uint32_t lastBin(defaultValue);
+
+    // reference ID data
+    int32_t saveRefID(defaultValue);
+    int32_t lastRefID(defaultValue);
+
+    // offset data
+    uint64_t saveOffset = m_BGZF->Tell();
+    uint64_t lastOffset = saveOffset;
+
+    // coordinate data
+    int32_t lastCoordinate = defaultValue;
+
+    BamAlignment bAlignment;
+    while ( m_reader->GetNextAlignmentCore(bAlignment) ) {
+
+	// change of chromosome, save ID, reset bin
+	if ( lastRefID != bAlignment.RefID ) {
+	    lastRefID = bAlignment.RefID;
+	    lastBin   = defaultValue;
+	}
+
+	// if lastCoordinate greater than BAM position - file not sorted properly
+	else if ( lastCoordinate > bAlignment.Position ) {
+	    fprintf(stderr, "BAM file not properly sorted:\n");
+	    fprintf(stderr, "Alignment %s : %d > %d on reference (id = %d)", bAlignment.Name.c_str(),
+		    lastCoordinate, bAlignment.Position, bAlignment.RefID);
+	    exit(1);
+	}
+
+	// if valid reference && BAM bin spans some minimum cutoff (smaller bin ids span larger regions)
+	if ( (bAlignment.RefID >= 0) && (bAlignment.Bin < 4681) ) {
+
+	    // save linear offset entry (matched to BAM entry refID)
+	    BamStandardIndexData::iterator indexIter = m_indexData.find(bAlignment.RefID);
+	    if ( indexIter == m_indexData.end() ) return false; // error
+	    ReferenceIndex& refIndex = (*indexIter).second;
+	    LinearOffsetVector& offsets = refIndex.Offsets;
+	    SaveLinearOffset(offsets, bAlignment, lastOffset);
+	}
+
+	// if current BamAlignment bin != lastBin, "then possibly write the binning index"
+	if ( bAlignment.Bin != lastBin ) {
+
+	    // if not first time through
+	    if ( saveBin != defaultValue ) {
+
+		// save Bam bin entry
+		BamStandardIndexData::iterator indexIter = m_indexData.find(saveRefID);
+		if ( indexIter == m_indexData.end() ) return false; // error
+		ReferenceIndex& refIndex = (*indexIter).second;
+		BamBinMap& binMap = refIndex.Bins;
+		SaveBinEntry(binMap, saveBin, saveOffset, lastOffset);
+	    }
+
+	    // update saveOffset
+	    saveOffset = lastOffset;
+
+	    // update bin values
+	    saveBin = bAlignment.Bin;
+	    lastBin = bAlignment.Bin;
+
+	    // update saveRefID
+	    saveRefID = bAlignment.RefID;
+
+	    // if invalid RefID, break out
+	    if ( saveRefID < 0 ) break;
+	}
+
+	// make sure that current file pointer is beyond lastOffset
+	if ( m_BGZF->Tell() <= (int64_t)lastOffset ) {
+	    fprintf(stderr, "Error in BGZF offsets.\n");
+	    exit(1);
+	}
+
+	// update lastOffset
+	lastOffset = m_BGZF->Tell();
+
+	// update lastCoordinate
+	lastCoordinate = bAlignment.Position;
+    }
+
+    // save any leftover BAM data (as long as refID is valid)
+    if ( saveRefID >= 0 ) {
+	// save Bam bin entry
+	BamStandardIndexData::iterator indexIter = m_indexData.find(saveRefID);
+	if ( indexIter == m_indexData.end() ) return false; // error
+	ReferenceIndex& refIndex = (*indexIter).second;
+	BamBinMap& binMap = refIndex.Bins;
+	SaveBinEntry(binMap, saveBin, saveOffset, lastOffset);
+    }
+
+    // simplify index by merging chunks
+    MergeChunks();
+
+    // iterate through references in index
+    // sort offsets in linear offset vector
+    BamStandardIndexData::iterator indexIter = m_indexData.begin();
+    BamStandardIndexData::iterator indexEnd  = m_indexData.end();
+    for ( int i = 0; indexIter != indexEnd; ++indexIter, ++i ) {
+
+	// get reference index data
+	ReferenceIndex& refIndex = (*indexIter).second;
+	LinearOffsetVector& offsets = refIndex.Offsets;
+
+	// sort linear offsets
+	sort(offsets.begin(), offsets.end());
+    }
+
+    // rewind file pointer to beginning of alignments, return success/fail
+    return m_reader->Rewind();
+}
+
+// check index file magic number, return true if OK
+bool BamStandardIndex::CheckMagicNumber(void) {
+
+    // read in magic number
+    char magic[4];
+    size_t elementsRead = fread(magic, sizeof(char), 4, m_indexStream);
+
+    // compare to expected value
+    if ( strncmp(magic, "BAI\1", 4) != 0 ) {
+	fprintf(stderr, "Problem with index file - invalid format.\n");
+	fclose(m_indexStream);
+	return false;
+    }
+
+    // return success/failure of load
+    return (elementsRead == 4);
+}
+
+// clear all current index offset data in memory
+void BamStandardIndex::ClearAllData(void) {
+    BamStandardIndexData::const_iterator indexIter = m_indexData.begin();
+    BamStandardIndexData::const_iterator indexEnd  = m_indexData.end();
+    for ( ; indexIter != indexEnd; ++indexIter ) {
+	const int& refId = (*indexIter).first;
+	ClearReferenceOffsets(refId);
+    }
+}
+
+// clear all index offset data for desired reference
+void BamStandardIndex::ClearReferenceOffsets(const int& refId) {
+
+    // look up refId, skip if not found
+    BamStandardIndexData::iterator indexIter = m_indexData.find(refId);
+    if ( indexIter == m_indexData.end() ) return ;
+
+    // clear reference data
+    ReferenceIndex& refEntry = (*indexIter).second;
+    refEntry.Bins.clear();
+    refEntry.Offsets.clear();
+
+    // set flag
+    m_hasFullDataCache = false;
+}
+
+// return file position after header metadata
+const off_t BamStandardIndex::DataBeginOffset(void) const {
+    return m_dataBeginOffset;
+}
+
+// calculates offset(s) for a given region
+bool BamStandardIndex::GetOffsets(const BamRegion& region,
+				  const bool isRightBoundSpecified,
+				  vector<int64_t>& offsets,
+				  bool* hasAlignmentsInRegion)
+{
+    // return false if leftBound refID is not found in index data
+    if ( m_indexData.find(region.LeftRefID) == m_indexData.end() )
+	return false;
+
+    // load index data for region if not already cached
+    if ( !IsDataLoaded(region.LeftRefID) ) {
+	bool loadedOk = true;
+	loadedOk &= SkipToReference(region.LeftRefID);
+	loadedOk &= LoadReference(region.LeftRefID);
+	if ( !loadedOk ) return false;
+    }
+
+    // calculate which bins overlap this region
+    uint16_t* bins = (uint16_t*)calloc(MAX_BIN, 2);
+    int numBins = BinsFromRegion(region, isRightBoundSpecified, bins);
+
+    // get bins for this reference
+    BamStandardIndexData::const_iterator indexIter = m_indexData.find(region.LeftRefID);
+    if ( indexIter == m_indexData.end() ) return false; // error
+    const ReferenceIndex& refIndex = (*indexIter).second;
+    const BamBinMap& binMap        = refIndex.Bins;
+
+    // get minimum offset to consider
+    const LinearOffsetVector& linearOffsets = refIndex.Offsets;
+    const uint64_t minOffset = ( (unsigned int)(region.LeftPosition>>BAM_LIDX_SHIFT) >= linearOffsets.size() )
+			       ? 0 : linearOffsets.at(region.LeftPosition>>BAM_LIDX_SHIFT);
+
+    // store all alignment 'chunk' starts (file offsets) for bins in this region
+    for ( int i = 0; i < numBins; ++i ) {
+
+	const uint16_t binKey = bins[i];
+	map<uint32_t, ChunkVector>::const_iterator binIter = binMap.find(binKey);
+	if ( (binIter != binMap.end()) && ((*binIter).first == binKey) ) {
+
+	    // iterate over chunks
+	    const ChunkVector& chunks = (*binIter).second;
+	    std::vector<Chunk>::const_iterator chunksIter = chunks.begin();
+	    std::vector<Chunk>::const_iterator chunksEnd  = chunks.end();
+	    for ( ; chunksIter != chunksEnd; ++chunksIter) {
+
+		// if valid chunk found, store its file offset
+		const Chunk& chunk = (*chunksIter);
+		if ( chunk.Stop > minOffset )
+		    offsets.push_back( chunk.Start );
+	    }
+	}
+    }
+
+    // clean up memory
+    free(bins);
+
+    // sort the offsets before returning
+    sort(offsets.begin(), offsets.end());
+
+    // set flag & return success
+    *hasAlignmentsInRegion = (offsets.size() != 0 );
+
+    // if cache mode set to none, dump the data we just loaded
+    if (m_cacheMode == BamIndex::NoIndexCaching )
+	ClearReferenceOffsets(region.LeftRefID);
+
+    // return succes
+    return true;
+}
+
+// returns whether reference has alignments or no
+bool BamStandardIndex::HasAlignments(const int& refId) const {
+    BamStandardIndexData::const_iterator indexIter = m_indexData.find(refId);
+    if ( indexIter == m_indexData.end() ) return false; // error
+    const ReferenceIndex& refEntry = (*indexIter).second;
+    return refEntry.HasAlignments;
+}
+
+// return true if all index data is cached
+bool BamStandardIndex::HasFullDataCache(void) const {
+    return m_hasFullDataCache;
+}
+
+// returns true if index cache has data for desired reference
+bool BamStandardIndex::IsDataLoaded(const int& refId) const {
+
+    // look up refId, return false if not found
+    BamStandardIndexData::const_iterator indexIter = m_indexData.find(refId);
+    if ( indexIter == m_indexData.end() ) return false;
+
+    // see if reference has alignments
+    // if not, it's not a problem to have no offset data
+    const ReferenceIndex& refEntry = (*indexIter).second;
+    if ( !refEntry.HasAlignments ) return true;
+
+    // return whether bin map contains data
+    return ( !refEntry.Bins.empty() );
+}
+
+// attempts to use index to jump to region; returns success/fail
+bool BamStandardIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) {
+
+    // be sure reader & BGZF file are valid & open for reading
+    if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
+	return false;
+
+    // make sure left-bound position is valid
+    if ( region.LeftPosition > m_references.at(region.LeftRefID).RefLength )
+	return false;
+
+    // calculate offsets for this region
+    // if failed, print message, set flag, and return failure
+    vector<int64_t> offsets;
+    if ( !GetOffsets(region, region.isRightBoundSpecified(), offsets, hasAlignmentsInRegion) ) {
+	fprintf(stderr, "ERROR: Could not jump: unable to calculate offset(s) for specified region.\n");
+	*hasAlignmentsInRegion = false;
+	return false;
+    }
+
+    // iterate through offsets
+    BamAlignment bAlignment;
+    bool result = true;
+    for ( vector<int64_t>::const_iterator o = offsets.begin(); o != offsets.end(); ++o) {
+
+	// attempt seek & load first available alignment
+	// set flag to true if data exists
+	result &= m_BGZF->Seek(*o);
+	*hasAlignmentsInRegion = m_reader->GetNextAlignmentCore(bAlignment);
+
+	// if this alignment corresponds to desired position
+	// return success of seeking back to the offset before the 'current offset' (to cover overlaps)
+	if ( ((bAlignment.RefID == region.LeftRefID) &&
+	       ((bAlignment.Position + bAlignment.Length) > region.LeftPosition)) ||
+	     (bAlignment.RefID > region.LeftRefID) )
+	{
+	    if ( o != offsets.begin() ) --o;
+	    return m_BGZF->Seek(*o);
+	}
+    }
+
+    // if error in jumping, print message & set flag
+    if ( !result ) {
+	fprintf(stderr, "ERROR: Could not jump: unable to determine correct offset for specified region.\n");
+	*hasAlignmentsInRegion = false;
+    }
+
+    // return success/failure
+    return result;
+}
+
+// clears index data from all references except the first
+void BamStandardIndex::KeepOnlyFirstReferenceOffsets(void) {
+    BamStandardIndexData::const_iterator indexBegin = m_indexData.begin();
+    KeepOnlyReferenceOffsets((*indexBegin).first);
+}
+
+// clears index data from all references except the one specified
+void BamStandardIndex::KeepOnlyReferenceOffsets(const int& refId) {
+    BamStandardIndexData::iterator mapIter = m_indexData.begin();
+    BamStandardIndexData::iterator mapEnd  = m_indexData.end();
+    for ( ; mapIter != mapEnd; ++mapIter ) {
+	const int entryRefId = (*mapIter).first;
+	if ( entryRefId != refId )
+	    ClearReferenceOffsets(entryRefId);
+    }
+}
+
+bool BamStandardIndex::LoadAllReferences(bool saveData) {
+
+    // skip if data already loaded
+    if ( m_hasFullDataCache ) return true;
+
+    // get number of reference sequences
+    uint32_t numReferences;
+    if ( !LoadReferenceCount((int&)numReferences) )
+	return false;
+
+    // iterate over reference entries
+    bool loadedOk = true;
+    for ( int i = 0; i < (int)numReferences; ++i )
+	loadedOk &= LoadReference(i, saveData);
+
+    // set flag
+    if ( loadedOk && saveData )
+	m_hasFullDataCache = true;
+
+    // return success/failure of loading references
+    return loadedOk;
+}
+
+// load header data from index file, return true if loaded OK
+bool BamStandardIndex::LoadHeader(void) {
+
+    bool loadedOk = CheckMagicNumber();
+
+    // store offset of beginning of data
+    m_dataBeginOffset = ftell64(m_indexStream);
+
+    // return success/failure of load
+    return loadedOk;
+}
+
+// load a single index bin entry from file, return true if loaded OK
+// @saveData - save data in memory if true, just read & discard if false
+bool BamStandardIndex::LoadBin(ReferenceIndex& refEntry, bool saveData) {
+
+    size_t elementsRead = 0;
+
+    // get bin ID
+    uint32_t binId;
+    elementsRead += fread(&binId, sizeof(binId), 1, m_indexStream);
+    if ( m_isBigEndian ) SwapEndian_32(binId);
+
+    // load alignment chunks for this bin
+    ChunkVector chunks;
+    bool chunksOk = LoadChunks(chunks, saveData);
+
+    // store bin entry
+    if ( chunksOk && saveData )
+	refEntry.Bins.insert(pair<uint32_t, ChunkVector>(binId, chunks));
+
+    // return success/failure of load
+    return ( (elementsRead == 1) && chunksOk );
+}
+
+bool BamStandardIndex::LoadBins(ReferenceIndex& refEntry, bool saveData) {
+
+    size_t elementsRead = 0;
+
+    // get number of bins
+    int32_t numBins;
+    elementsRead += fread(&numBins, sizeof(numBins), 1, m_indexStream);
+    if ( m_isBigEndian ) SwapEndian_32(numBins);
+
+    // set flag
+    refEntry.HasAlignments = ( numBins != 0 );
+
+    // iterate over bins
+    bool binsOk = true;
+    for ( int i = 0; i < numBins; ++i )
+	binsOk &= LoadBin(refEntry, saveData);
+
+    // return success/failure of load
+    return ( (elementsRead == 1) && binsOk );
+}
+
+// load a single index bin entry from file, return true if loaded OK
+// @saveData - save data in memory if true, just read & discard if false
+bool BamStandardIndex::LoadChunk(ChunkVector& chunks, bool saveData) {
+
+    size_t elementsRead = 0;
+
+    // read in chunk data
+    uint64_t start;
+    uint64_t stop;
+    elementsRead += fread(&start, sizeof(start), 1, m_indexStream);
+    elementsRead += fread(&stop,  sizeof(stop),  1, m_indexStream);
+
+    // swap endian-ness if necessary
+    if ( m_isBigEndian ) {
+	SwapEndian_64(start);
+	SwapEndian_64(stop);
+    }
+
+    // save data if requested
+    if ( saveData ) chunks.push_back( Chunk(start, stop) );
+
+    // return success/failure of load
+    return ( elementsRead == 2 );
+}
+
+bool BamStandardIndex::LoadChunks(ChunkVector& chunks, bool saveData) {
+
+    size_t elementsRead = 0;
+
+    // read in number of chunks
+    uint32_t numChunks;
+    elementsRead += fread(&numChunks, sizeof(numChunks), 1, m_indexStream);
+    if ( m_isBigEndian ) SwapEndian_32(numChunks);
+
+    // initialize space for chunks if we're storing this data
+    if ( saveData ) chunks.reserve(numChunks);
+
+    // iterate over chunks
+    bool chunksOk = true;
+    for ( int i = 0; i < (int)numChunks; ++i )
+	chunksOk &= LoadChunk(chunks, saveData);
+
+    // sort chunk vector
+    sort( chunks.begin(), chunks.end(), ChunkLessThan );
+
+    // return success/failure of load
+    return ( (elementsRead == 1) && chunksOk );
+}
+
+// load a single index linear offset entry from file, return true if loaded OK
+// @saveData - save data in memory if true, just read & discard if false
+bool BamStandardIndex::LoadLinearOffsets(ReferenceIndex& refEntry, bool saveData) {
+
+    size_t elementsRead = 0;
+
+    // read in number of linear offsets
+    int32_t numLinearOffsets;
+    elementsRead += fread(&numLinearOffsets, sizeof(numLinearOffsets), 1, m_indexStream);
+    if ( m_isBigEndian ) SwapEndian_32(numLinearOffsets);
+
+    // set up destination vector (if we're saving the data)
+    LinearOffsetVector linearOffsets;
+    if ( saveData ) linearOffsets.reserve(numLinearOffsets);
+
+    // iterate over linear offsets
+    uint64_t linearOffset;
+    for ( int i = 0; i < numLinearOffsets; ++i ) {
+	elementsRead += fread(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
+	if ( m_isBigEndian ) SwapEndian_64(linearOffset);
+	if ( saveData ) linearOffsets.push_back(linearOffset);
+    }
+
+    // sort linear offsets
+    sort ( linearOffsets.begin(), linearOffsets.end() );
+
+    // save in reference index entry if desired
+    if ( saveData ) refEntry.Offsets = linearOffsets;
+
+    // return success/failure of load
+    return ( elementsRead == (size_t)(numLinearOffsets + 1) );
+}
+
+bool BamStandardIndex::LoadFirstReference(bool saveData) {
+    BamStandardIndexData::const_iterator indexBegin = m_indexData.begin();
+    return LoadReference((*indexBegin).first, saveData);
+}
+
+// load a single reference from file, return true if loaded OK
+// @saveData - save data in memory if true, just read & discard if false
+bool BamStandardIndex::LoadReference(const int& refId, bool saveData) {
+
+    // look up refId
+    BamStandardIndexData::iterator indexIter = m_indexData.find(refId);
+
+    // if reference not previously loaded, create new entry
+    if ( indexIter == m_indexData.end() ) {
+	ReferenceIndex newEntry;
+	newEntry.HasAlignments = false;
+	m_indexData.insert( pair<int32_t, ReferenceIndex>(refId, newEntry) );
+    }
+
+    // load reference data
+    indexIter = m_indexData.find(refId);
+    ReferenceIndex& entry = (*indexIter).second;
+    bool loadedOk = true;
+    loadedOk &= LoadBins(entry, saveData);
+    loadedOk &= LoadLinearOffsets(entry, saveData);
+    return loadedOk;
+}
+
+// loads number of references, return true if loaded OK
+bool BamStandardIndex::LoadReferenceCount(int& numReferences) {
+
+    size_t elementsRead = 0;
+
+    // read reference count
+    elementsRead += fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
+    if ( m_isBigEndian ) SwapEndian_32(numReferences);
+
+    // return success/failure of load
+    return ( elementsRead == 1 );
+}
+
+// merges 'alignment chunks' in BAM bin (used for index building)
+void BamStandardIndex::MergeChunks(void) {
+
+    // iterate over reference enties
+    BamStandardIndexData::iterator indexIter = m_indexData.begin();
+    BamStandardIndexData::iterator indexEnd  = m_indexData.end();
+    for ( ; indexIter != indexEnd; ++indexIter ) {
+
+	// get BAM bin map for this reference
+	ReferenceIndex& refIndex = (*indexIter).second;
+	BamBinMap& bamBinMap = refIndex.Bins;
+
+	// iterate over BAM bins
+	BamBinMap::iterator binIter = bamBinMap.begin();
+	BamBinMap::iterator binEnd  = bamBinMap.end();
+	for ( ; binIter != binEnd; ++binIter ) {
+
+	    // get chunk vector for this bin
+	    ChunkVector& binChunks = (*binIter).second;
+	    if ( binChunks.size() == 0 ) continue;
+
+	    ChunkVector mergedChunks;
+	    mergedChunks.push_back( binChunks[0] );
+
+	    // iterate over chunks
+	    int i = 0;
+	    ChunkVector::iterator chunkIter = binChunks.begin();
+	    ChunkVector::iterator chunkEnd  = binChunks.end();
+	    for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
+
+		// get 'currentChunk' based on numeric index
+		Chunk& currentChunk = mergedChunks[i];
+
+		// get iteratorChunk based on vector iterator
+		Chunk& iteratorChunk = (*chunkIter);
+
+		// if chunk ends where (iterator) chunk starts, then merge
+		if ( currentChunk.Stop>>16 == iteratorChunk.Start>>16 )
+		    currentChunk.Stop = iteratorChunk.Stop;
+
+		// otherwise
+		else {
+		    // set currentChunk + 1 to iteratorChunk
+		    mergedChunks.push_back(iteratorChunk);
+		    ++i;
+		}
+	    }
+
+	    // saved merged chunk vector
+	    (*binIter).second = mergedChunks;
+	}
+    }
+}
+
+// saves BAM bin entry for index
+void BamStandardIndex::SaveBinEntry(BamBinMap& binMap,
+				    const uint32_t& saveBin,
+				    const uint64_t& saveOffset,
+				    const uint64_t& lastOffset)
+{
+    // look up saveBin
+    BamBinMap::iterator binIter = binMap.find(saveBin);
+
+    // create new chunk
+    Chunk newChunk(saveOffset, lastOffset);
+
+    // if entry doesn't exist
+    if ( binIter == binMap.end() ) {
+	ChunkVector newChunks;
+	newChunks.push_back(newChunk);
+	binMap.insert( pair<uint32_t, ChunkVector>(saveBin, newChunks));
+    }
+
+    // otherwise
+    else {
+	ChunkVector& binChunks = (*binIter).second;
+	binChunks.push_back( newChunk );
+    }
+}
+
+// saves linear offset entry for index
+void BamStandardIndex::SaveLinearOffset(LinearOffsetVector& offsets,
+					const BamAlignment& bAlignment,
+					const uint64_t&     lastOffset)
+{
+    // get converted offsets
+    int beginOffset = bAlignment.Position >> BAM_LIDX_SHIFT;
+    int endOffset   = (bAlignment.GetEndPosition() - 1) >> BAM_LIDX_SHIFT;
+
+    // resize vector if necessary
+    int oldSize = offsets.size();
+    int newSize = endOffset + 1;
+    if ( oldSize < newSize )
+	offsets.resize(newSize, 0);
+
+    // store offset
+    for( int i = beginOffset + 1; i <= endOffset; ++i ) {
+	if ( offsets[i] == 0 )
+	    offsets[i] = lastOffset;
+    }
+}
+
+// initializes index data structure to hold @count references
+void BamStandardIndex::SetReferenceCount(const int& count) {
+    for ( int i = 0; i < count; ++i )
+	m_indexData[i].HasAlignments = false;
+}
+
+bool BamStandardIndex::SkipToFirstReference(void) {
+    BamStandardIndexData::const_iterator indexBegin = m_indexData.begin();
+    return SkipToReference( (*indexBegin).first );
+}
+
+// position file pointer to desired reference begin, return true if skipped OK
+bool BamStandardIndex::SkipToReference(const int& refId) {
+
+    // attempt rewind
+    if ( !Rewind() ) return false;
+
+    // read in number of references
+    uint32_t numReferences;
+    size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
+    if ( elementsRead != 1 ) return false;
+    if ( m_isBigEndian ) SwapEndian_32(numReferences);
+
+    // iterate over reference entries
+    bool skippedOk = true;
+    int currentRefId = 0;
+    while (currentRefId != refId) {
+	skippedOk &= LoadReference(currentRefId, false);
+	++currentRefId;
+    }
+
+    // return success
+    return skippedOk;
+}
+
+// write header to new index file
+bool BamStandardIndex::WriteHeader(void) {
+
+    size_t elementsWritten = 0;
+
+    // write magic number
+    elementsWritten += fwrite("BAI\1", sizeof(char), 4, m_indexStream);
+
+    // store offset of beginning of data
+    m_dataBeginOffset = ftell64(m_indexStream);
+
+    // return success/failure of write
+    return (elementsWritten == 4);
+}
+
+// write index data for all references to new index file
+bool BamStandardIndex::WriteAllReferences(void) {
+
+    size_t elementsWritten = 0;
+
+    // write number of reference sequences
+    int32_t numReferenceSeqs = m_indexData.size();
+    if ( m_isBigEndian ) SwapEndian_32(numReferenceSeqs);
+    elementsWritten += fwrite(&numReferenceSeqs, sizeof(numReferenceSeqs), 1, m_indexStream);
+
+    // iterate over reference sequences
+    bool refsOk = true;
+    BamStandardIndexData::const_iterator indexIter = m_indexData.begin();
+    BamStandardIndexData::const_iterator indexEnd  = m_indexData.end();
+    for ( ; indexIter != indexEnd; ++ indexIter )
+	refsOk &= WriteReference( (*indexIter).second );
+
+    // return success/failure of write
+    return ( (elementsWritten == 1) && refsOk );
+}
+
+// write index data for bin to new index file
+bool BamStandardIndex::WriteBin(const uint32_t& binId, const ChunkVector& chunks) {
+
+    size_t elementsWritten = 0;
+
+    // write BAM bin ID
+    uint32_t binKey = binId;
+    if ( m_isBigEndian ) SwapEndian_32(binKey);
+    elementsWritten += fwrite(&binKey, sizeof(binKey), 1, m_indexStream);
+
+    // write chunks
+    bool chunksOk = WriteChunks(chunks);
+
+    // return success/failure of write
+    return ( (elementsWritten == 1) && chunksOk );
+}
+
+// write index data for bins to new index file
+bool BamStandardIndex::WriteBins(const BamBinMap& bins) {
+
+    size_t elementsWritten = 0;
+
+    // write number of bins
+    int32_t binCount = bins.size();
+    if ( m_isBigEndian ) SwapEndian_32(binCount);
+    elementsWritten += fwrite(&binCount, sizeof(binCount), 1, m_indexStream);
+
+    // iterate over bins
+    bool binsOk = true;
+    BamBinMap::const_iterator binIter = bins.begin();
+    BamBinMap::const_iterator binEnd  = bins.end();
+    for ( ; binIter != binEnd; ++binIter )
+	binsOk &= WriteBin( (*binIter).first, (*binIter).second );
+
+    // return success/failure of write
+    return ( (elementsWritten == 1) && binsOk );
+}
+
+// write index data for chunk entry to new index file
+bool BamStandardIndex::WriteChunk(const Chunk& chunk) {
+
+    size_t elementsWritten = 0;
+
+    // localize alignment chunk offsets
+    uint64_t start = chunk.Start;
+    uint64_t stop  = chunk.Stop;
+
+    // swap endian-ness if necessary
+    if ( m_isBigEndian ) {
+	SwapEndian_64(start);
+	SwapEndian_64(stop);
+    }
+
+    // write to index file
+    elementsWritten += fwrite(&start, sizeof(start), 1, m_indexStream);
+    elementsWritten += fwrite(&stop,  sizeof(stop),  1, m_indexStream);
+
+    // return success/failure of write
+    return ( elementsWritten == 2 );
+}
+
+// write index data for chunk entry to new index file
+bool BamStandardIndex::WriteChunks(const ChunkVector& chunks) {
+
+    size_t elementsWritten = 0;
+
+    // write chunks
+    int32_t chunkCount = chunks.size();
+    if ( m_isBigEndian ) SwapEndian_32(chunkCount);
+    elementsWritten += fwrite(&chunkCount, sizeof(chunkCount), 1, m_indexStream);
+
+    // iterate over chunks
+    bool chunksOk = true;
+    ChunkVector::const_iterator chunkIter = chunks.begin();
+    ChunkVector::const_iterator chunkEnd  = chunks.end();
+    for ( ; chunkIter != chunkEnd; ++chunkIter )
+	chunksOk &= WriteChunk( (*chunkIter) );
+
+    // return success/failure of write
+    return ( (elementsWritten == 1) && chunksOk );
+}
+
+// write index data for linear offsets entry to new index file
+bool BamStandardIndex::WriteLinearOffsets(const LinearOffsetVector& offsets) {
+
+    size_t elementsWritten = 0;
+
+    // write number of linear offsets
+    int32_t offsetCount = offsets.size();
+    if ( m_isBigEndian ) SwapEndian_32(offsetCount);
+    elementsWritten += fwrite(&offsetCount, sizeof(offsetCount), 1, m_indexStream);
+
+    // iterate over linear offsets
+    LinearOffsetVector::const_iterator offsetIter = offsets.begin();
+    LinearOffsetVector::const_iterator offsetEnd  = offsets.end();
+    for ( ; offsetIter != offsetEnd; ++offsetIter ) {
+
+	// write linear offset
+	uint64_t linearOffset = (*offsetIter);
+	if ( m_isBigEndian ) SwapEndian_64(linearOffset);
+	elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
+    }
+
+    // return success/failure of write
+    return ( elementsWritten == (size_t)(offsetCount + 1) );
+}
+
+// write index data for a single reference to new index file
+bool BamStandardIndex::WriteReference(const ReferenceIndex& refEntry) {
+    bool refOk = true;
+    refOk &= WriteBins(refEntry.Bins);
+    refOk &= WriteLinearOffsets(refEntry.Offsets);
+    return refOk;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamStandardIndex_p.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,213 @@
+// ***************************************************************************
+// BamStandardIndex.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the standardized BAM index format (".bai")
+// ***************************************************************************
+
+#ifndef BAM_STANDARD_INDEX_FORMAT_H
+#define BAM_STANDARD_INDEX_FORMAT_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail.  This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+
+#include <BamAux.h>
+#include <BamIndex.h>
+#include <map>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+class BamAlignment;
+
+namespace Internal {
+
+// BAM index constants
+const int MAX_BIN        = 37450;    // =(8^6-1)/7+1
+const int BAM_LIDX_SHIFT = 14;
+
+// --------------------------------------------------
+// BamStandardIndex data structures & typedefs
+struct Chunk {
+
+    // data members
+    uint64_t Start;
+    uint64_t Stop;
+
+    // constructor
+    Chunk(const uint64_t& start = 0,
+	  const uint64_t& stop = 0)
+	: Start(start)
+	, Stop(stop)
+    { }
+};
+
+inline
+bool ChunkLessThan(const Chunk& lhs, const Chunk& rhs) {
+    return lhs.Start < rhs.Start;
+}
+
+typedef std::vector<Chunk> ChunkVector;
+typedef std::map<uint32_t, ChunkVector> BamBinMap;
+typedef std::vector<uint64_t> LinearOffsetVector;
+
+struct ReferenceIndex {
+
+    // data members
+    BamBinMap Bins;
+    LinearOffsetVector Offsets;
+    bool HasAlignments;
+
+    // constructor
+    ReferenceIndex(const BamBinMap& binMap           = BamBinMap(),
+		   const LinearOffsetVector& offsets = LinearOffsetVector(),
+		   const bool hasAlignments          = false)
+	: Bins(binMap)
+	, Offsets(offsets)
+	, HasAlignments(hasAlignments)
+    { }
+};
+
+typedef std::map<int32_t, ReferenceIndex> BamStandardIndexData;
+
+class BamStandardIndex : public BamIndex {
+
+    // ctor & dtor
+    public:
+	BamStandardIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader);
+	~BamStandardIndex(void);
+
+    // interface (implements BamIndex virtual methods)
+    public:
+	// creates index data (in-memory) from current reader data
+	bool Build(void);
+	// returns supported file extension
+	const std::string Extension(void) const { return std::string(".bai"); }
+	// returns whether reference has alignments or no
+	bool HasAlignments(const int& referenceID) const;
+	// attempts to use index to jump to region; returns success/fail
+	// a "successful" jump indicates no error, but not whether this region has data
+	//   * thus, the method sets a flag to indicate whether there are alignments
+	//     available after the jump position
+	bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
+    public:
+	// clear all current index offset data in memory
+	void ClearAllData(void);
+	// return file position after header metadata
+	const off_t DataBeginOffset(void) const;
+	// return true if all index data is cached
+	bool HasFullDataCache(void) const;
+	// clears index data from all references except the first
+	void KeepOnlyFirstReferenceOffsets(void);
+	// load index data for all references, return true if loaded OK
+	// @saveData - save data in memory if true, just read & discard if false
+	bool LoadAllReferences(bool saveData = true);
+	// load first reference from file, return true if loaded OK
+	// @saveData - save data in memory if true, just read & discard if false
+	bool LoadFirstReference(bool saveData = true);
+	// load header data from index file, return true if loaded OK
+	bool LoadHeader(void);
+	// position file pointer to first reference begin, return true if skipped OK
+	bool SkipToFirstReference(void);
+	// write index reference data
+	bool WriteAllReferences(void);
+	// write index header data
+	bool WriteHeader(void);
+
+    // 'internal' methods
+    public:
+
+	// -----------------------
+	// index file operations
+
+	// check index file magic number, return true if OK
+	bool CheckMagicNumber(void);
+	// check index file version, return true if OK
+	bool CheckVersion(void);
+	// load a single index bin entry from file, return true if loaded OK
+	// @saveData - save data in memory if true, just read & discard if false
+	bool LoadBin(ReferenceIndex& refEntry, bool saveData = true);
+	bool LoadBins(ReferenceIndex& refEntry, bool saveData = true);
+	// load a single index bin entry from file, return true if loaded OK
+	// @saveData - save data in memory if true, just read & discard if false
+	bool LoadChunk(ChunkVector& chunks, bool saveData = true);
+	bool LoadChunks(ChunkVector& chunks, bool saveData = true);
+	// load a single index linear offset entry from file, return true if loaded OK
+	// @saveData - save data in memory if true, just read & discard if false
+	bool LoadLinearOffsets(ReferenceIndex& refEntry, bool saveData = true);
+	// load a single reference from file, return true if loaded OK
+	// @saveData - save data in memory if true, just read & discard if false
+	bool LoadReference(const int& refId, bool saveData = true);
+	// loads number of references, return true if loaded OK
+	bool LoadReferenceCount(int& numReferences);
+	// position file pointer to desired reference begin, return true if skipped OK
+	bool SkipToReference(const int& refId);
+	// write index data for bin to new index file
+	bool WriteBin(const uint32_t& binId, const ChunkVector& chunks);
+	// write index data for bins to new index file
+	bool WriteBins(const BamBinMap& bins);
+	// write index data for chunk entry to new index file
+	bool WriteChunk(const Chunk& chunk);
+	// write index data for chunk entry to new index file
+	bool WriteChunks(const ChunkVector& chunks);
+	// write index data for linear offsets entry to new index file
+	bool WriteLinearOffsets(const LinearOffsetVector& offsets);
+	// write index data single reference to new index file
+	bool WriteReference(const ReferenceIndex& refEntry);
+
+	// -----------------------
+	// index data operations
+
+	// calculate bins that overlap region
+	int BinsFromRegion(const BamRegion& region,
+			   const bool isRightBoundSpecified,
+			   uint16_t bins[MAX_BIN]);
+	// clear all index offset data for desired reference
+	void ClearReferenceOffsets(const int& refId);
+	// calculates offset(s) for a given region
+	bool GetOffsets(const BamRegion& region,
+			const bool isRightBoundSpecified,
+			std::vector<int64_t>& offsets,
+			bool* hasAlignmentsInRegion);
+	// returns true if index cache has data for desired reference
+	bool IsDataLoaded(const int& refId) const;
+	// clears index data from all references except the one specified
+	void KeepOnlyReferenceOffsets(const int& refId);
+	// simplifies index by merging 'chunks'
+	void MergeChunks(void);
+	// saves BAM bin entry for index
+	void SaveBinEntry(BamBinMap& binMap,
+			  const uint32_t& saveBin,
+			  const uint64_t& saveOffset,
+			  const uint64_t& lastOffset);
+	// saves linear offset entry for index
+	void SaveLinearOffset(LinearOffsetVector& offsets,
+			      const BamAlignment& bAlignment,
+			      const uint64_t& lastOffset);
+	// initializes index data structure to hold @count references
+	void SetReferenceCount(const int& count);
+
+    // data members
+    private:
+
+	BamStandardIndexData m_indexData;
+	off_t m_dataBeginOffset;
+	bool  m_hasFullDataCache;
+	bool  m_isBigEndian;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAM_STANDARD_INDEX_FORMAT_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamToolsIndex_p.cpp	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,577 @@
+// ***************************************************************************
+// BamToolsIndex.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 22 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the BamTools index format (".bti")
+// ***************************************************************************
+
+#include <BamAlignment.h>
+#include <BamReader.h>
+#include <BGZF.h>
+#include <BamToolsIndex_p.h>
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <cstdio>
+#include <cstdlib>
+#include <algorithm>
+#include <iostream>
+#include <map>
+using namespace std;
+
+BamToolsIndex::BamToolsIndex(BgzfData* bgzf, BamReader* reader)
+    : BamIndex(bgzf, reader)
+    , m_blockSize(1000)
+    , m_dataBeginOffset(0)
+    , m_hasFullDataCache(false)
+    , m_inputVersion(0)
+    , m_outputVersion(BTI_1_2) // latest version - used for writing new index files
+{
+    m_isBigEndian = BamTools::SystemIsBigEndian();
+}
+
+// dtor
+BamToolsIndex::~BamToolsIndex(void) {
+    ClearAllData();
+}
+
+// creates index data (in-memory) from current reader data
+bool BamToolsIndex::Build(void) {
+
+    // be sure reader & BGZF file are valid & open for reading
+    if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
+	return false;
+
+    // move file pointer to beginning of alignments
+    if ( !m_reader->Rewind() ) return false;
+
+    // initialize index data structure with space for all references
+    const int numReferences = (int)m_references.size();
+    m_indexData.clear();
+    m_hasFullDataCache = false;
+    SetReferenceCount(numReferences);
+
+    // set up counters and markers
+    int32_t currentBlockCount      = 0;
+    int64_t currentAlignmentOffset = m_BGZF->Tell();
+    int32_t blockRefId             = 0;
+    int32_t blockMaxEndPosition    = 0;
+    int64_t blockStartOffset       = currentAlignmentOffset;
+    int32_t blockStartPosition     = -1;
+
+    // plow through alignments, storing index entries
+    BamAlignment al;
+    while ( m_reader->GetNextAlignmentCore(al) ) {
+
+	// if block contains data (not the first time through) AND alignment is on a new reference
+	if ( currentBlockCount > 0 && al.RefID != blockRefId ) {
+
+	    // store previous data
+	    BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+	    SaveOffsetEntry(blockRefId, entry);
+
+	    // intialize new block for current alignment's reference
+	    currentBlockCount   = 0;
+	    blockMaxEndPosition = al.GetEndPosition();
+	    blockStartOffset    = currentAlignmentOffset;
+	}
+
+	// if beginning of block, save first alignment's refID & position
+	if ( currentBlockCount == 0 ) {
+	    blockRefId         = al.RefID;
+	    blockStartPosition = al.Position;
+	}
+
+	// increment block counter
+	++currentBlockCount;
+
+	// check end position
+	int32_t alignmentEndPosition = al.GetEndPosition();
+	if ( alignmentEndPosition > blockMaxEndPosition )
+	    blockMaxEndPosition = alignmentEndPosition;
+
+	// if block is full, get offset for next block, reset currentBlockCount
+	if ( currentBlockCount == m_blockSize ) {
+	    BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+	    SaveOffsetEntry(blockRefId, entry);
+	    blockStartOffset  = m_BGZF->Tell();
+	    currentBlockCount = 0;
+	}
+
+	// not the best name, but for the next iteration, this value will be the offset of the *current* alignment
+	// necessary because we won't know if this next alignment is on a new reference until we actually read it
+	currentAlignmentOffset = m_BGZF->Tell();
+    }
+
+    // store final block with data
+    BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition);
+    SaveOffsetEntry(blockRefId, entry);
+
+    // set flag
+    m_hasFullDataCache = true;
+
+    // return success/failure of rewind
+    return m_reader->Rewind();
+}
+
+// check index file magic number, return true if OK
+bool BamToolsIndex::CheckMagicNumber(void) {
+
+    // see if index is valid BAM index
+    char magic[4];
+    size_t elementsRead = fread(magic, 1, 4, m_indexStream);
+    if ( elementsRead != 4 ) return false;
+    if ( strncmp(magic, "BTI\1", 4) != 0 ) {
+	fprintf(stderr, "Problem with index file - invalid format.\n");
+	return false;
+    }
+
+    // otherwise ok
+    return true;
+}
+
+// check index file version, return true if OK
+bool BamToolsIndex::CheckVersion(void) {
+
+    // read version from file
+    size_t elementsRead = fread(&m_inputVersion, sizeof(m_inputVersion), 1, m_indexStream);
+    if ( elementsRead != 1 ) return false;
+    if ( m_isBigEndian ) SwapEndian_32(m_inputVersion);
+
+    // if version is negative, or zero
+    if ( m_inputVersion <= 0 ) {
+	fprintf(stderr, "Problem with index file - invalid version.\n");
+	return false;
+    }
+
+    // if version is newer than can be supported by this version of bamtools
+    else if ( m_inputVersion > m_outputVersion ) {
+	fprintf(stderr, "Problem with index file - attempting to use an outdated version of BamTools with a newer index file.\n");
+	fprintf(stderr, "Please update BamTools to a more recent version to support this index file.\n");
+	return false;
+    }
+
+    // ------------------------------------------------------------------
+    // check for deprecated, unsupported versions
+    // (typically whose format did not accomodate a particular bug fix)
+
+    else if ( (Version)m_inputVersion == BTI_1_0 ) {
+	fprintf(stderr, "\nProblem with index file - this version of the index contains a bug related to accessing data near reference ends.\n");
+	fprintf(stderr, "\nPlease run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date BamToolsIndex.\n\n");
+	return false;
+    }
+
+    else if ( (Version)m_inputVersion == BTI_1_1 ) {
+	fprintf(stderr, "\nProblem with index file - this version of the index contains a bug related to handling empty references.\n");
+	fprintf(stderr, "\nPlease run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date BamToolsIndex.\n\n");
+	return false;
+    }
+
+    // otherwise ok
+    else return true;
+}
+
+// clear all current index offset data in memory
+void BamToolsIndex::ClearAllData(void) {
+    BamToolsIndexData::const_iterator indexIter = m_indexData.begin();
+    BamToolsIndexData::const_iterator indexEnd  = m_indexData.end();
+    for ( ; indexIter != indexEnd; ++indexIter ) {
+	const int& refId = (*indexIter).first;
+	ClearReferenceOffsets(refId);
+    }
+}
+
+// clear all index offset data for desired reference
+void BamToolsIndex::ClearReferenceOffsets(const int& refId) {
+    if ( m_indexData.find(refId) == m_indexData.end() ) return;
+    vector<BamToolsIndexEntry>& offsets = m_indexData[refId].Offsets;
+    offsets.clear();
+    m_hasFullDataCache = false;
+}
+
+// return file position after header metadata
+const off_t BamToolsIndex::DataBeginOffset(void) const {
+    return m_dataBeginOffset;
+}
+
+// calculate BAM file offset for desired region
+// return true if no error (*NOT* equivalent to "has alignments or valid offset")
+//   check @hasAlignmentsInRegion to determine this status
+// @region - target region
+// @offset - resulting seek target
+// @hasAlignmentsInRegion - sometimes a file just lacks data in region, this flag indicates that status
+// N.B. - ignores isRightBoundSpecified
+bool BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
+
+    // return false if leftBound refID is not found in index data
+    BamToolsIndexData::const_iterator indexIter = m_indexData.find(region.LeftRefID);
+    if ( indexIter == m_indexData.end()) return false;
+
+    // load index data for region if not already cached
+    if ( !IsDataLoaded(region.LeftRefID) ) {
+	bool loadedOk = true;
+	loadedOk &= SkipToReference(region.LeftRefID);
+	loadedOk &= LoadReference(region.LeftRefID);
+	if ( !loadedOk ) return false;
+    }
+
+    // localize index data for this reference (& sanity check that data actually exists)
+    indexIter = m_indexData.find(region.LeftRefID);
+    if ( indexIter == m_indexData.end()) return false;
+    const vector<BamToolsIndexEntry>& referenceOffsets = (*indexIter).second.Offsets;
+    if ( referenceOffsets.empty() ) return false;
+
+    // -------------------------------------------------------
+    // calculate nearest index to jump to
+
+    // save first offset
+    offset = (*referenceOffsets.begin()).StartOffset;
+
+    // iterate over offsets entries on this reference
+    vector<BamToolsIndexEntry>::const_iterator offsetIter = referenceOffsets.begin();
+    vector<BamToolsIndexEntry>::const_iterator offsetEnd  = referenceOffsets.end();
+    for ( ; offsetIter != offsetEnd; ++offsetIter ) {
+	const BamToolsIndexEntry& entry = (*offsetIter);
+	// break if alignment 'entry' overlaps region
+	if ( entry.MaxEndPosition >= region.LeftPosition ) break;
+	offset = (*offsetIter).StartOffset;
+    }
+
+    // set flag based on whether an index entry was found for this region
+    *hasAlignmentsInRegion = ( offsetIter != offsetEnd );
+
+    // if cache mode set to none, dump the data we just loaded
+    if (m_cacheMode == BamIndex::NoIndexCaching )
+	ClearReferenceOffsets(region.LeftRefID);
+
+    // return success
+    return true;
+}
+
+// returns whether reference has alignments or no
+bool BamToolsIndex::HasAlignments(const int& refId) const {
+
+    BamToolsIndexData::const_iterator indexIter = m_indexData.find(refId);
+    if ( indexIter == m_indexData.end()) return false;
+    const BamToolsReferenceEntry& refEntry = (*indexIter).second;
+    return refEntry.HasAlignments;
+}
+
+// return true if all index data is cached
+bool BamToolsIndex::HasFullDataCache(void) const {
+    return m_hasFullDataCache;
+}
+
+// returns true if index cache has data for desired reference
+bool BamToolsIndex::IsDataLoaded(const int& refId) const {
+
+    BamToolsIndexData::const_iterator indexIter = m_indexData.find(refId);
+    if ( indexIter == m_indexData.end()) return false;
+    const BamToolsReferenceEntry& refEntry = (*indexIter).second;
+
+    if ( !refEntry.HasAlignments ) return true; // no data period
+
+    // return whether offsets list contains data
+    return !refEntry.Offsets.empty();
+}
+
+// attempts to use index to jump to region; returns success/fail
+bool BamToolsIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) {
+
+    // clear flag
+    *hasAlignmentsInRegion = false;
+
+    // check valid BamReader state
+    if ( m_reader == 0 || m_BGZF == 0 || !m_reader->IsOpen() ) {
+	fprintf(stderr, "ERROR: Could not jump: invalid BamReader state.\n");
+	return false;
+    }
+
+    // make sure left-bound position is valid
+    if ( region.LeftPosition > m_references.at(region.LeftRefID).RefLength )
+	return false;
+
+    // calculate nearest offset to jump to
+    int64_t offset;
+    if ( !GetOffset(region, offset, hasAlignmentsInRegion) ) {
+	fprintf(stderr, "ERROR: Could not jump - unable to calculate offset for specified region.\n");
+	return false;
+    }
+
+    // return success/failure of seek
+    return m_BGZF->Seek(offset);
+}
+
+// clears index data from all references except the first
+void BamToolsIndex::KeepOnlyFirstReferenceOffsets(void) {
+    BamToolsIndexData::const_iterator indexBegin = m_indexData.begin();
+    KeepOnlyReferenceOffsets( (*indexBegin).first );
+}
+
+// clears index data from all references except the one specified
+void BamToolsIndex::KeepOnlyReferenceOffsets(const int& refId) {
+    BamToolsIndexData::iterator mapIter = m_indexData.begin();
+    BamToolsIndexData::iterator mapEnd  = m_indexData.end();
+    for ( ; mapIter != mapEnd; ++mapIter ) {
+	const int entryRefId = (*mapIter).first;
+	if ( entryRefId != refId )
+	    ClearReferenceOffsets(entryRefId);
+    }
+}
+
+// load index data for all references, return true if loaded OK
+bool BamToolsIndex::LoadAllReferences(bool saveData) {
+
+    // skip if data already loaded
+    if ( m_hasFullDataCache ) return true;
+
+    // read in number of references
+    int32_t numReferences;
+    if ( !LoadReferenceCount(numReferences) ) return false;
+    //SetReferenceCount(numReferences);
+
+    // iterate over reference entries
+    bool loadedOk = true;
+    for ( int i = 0; i < numReferences; ++i )
+	loadedOk &= LoadReference(i, saveData);
+
+    // set flag
+    if ( loadedOk && saveData )
+	m_hasFullDataCache = true;
+
+    // return success/failure of load
+    return loadedOk;
+}
+
+// load header data from index file, return true if loaded OK
+bool BamToolsIndex::LoadHeader(void) {
+
+    // check magic number
+    if ( !CheckMagicNumber() ) return false;
+
+    // check BTI version
+    if ( !CheckVersion() ) return false;
+
+    // read in block size
+    size_t elementsRead = fread(&m_blockSize, sizeof(m_blockSize), 1, m_indexStream);
+    if ( elementsRead != 1 ) return false;
+    if ( m_isBigEndian ) SwapEndian_32(m_blockSize);
+
+    // store offset of beginning of data
+    m_dataBeginOffset = ftell64(m_indexStream);
+
+    // return success/failure of load
+    return (elementsRead == 1);
+}
+
+// load a single index entry from file, return true if loaded OK
+// @saveData - save data in memory if true, just read & discard if false
+bool BamToolsIndex::LoadIndexEntry(const int& refId, bool saveData) {
+
+    // read in index entry data members
+    size_t elementsRead = 0;
+    BamToolsIndexEntry entry;
+    elementsRead += fread(&entry.MaxEndPosition, sizeof(entry.MaxEndPosition), 1, m_indexStream);
+    elementsRead += fread(&entry.StartOffset,    sizeof(entry.StartOffset),    1, m_indexStream);
+    elementsRead += fread(&entry.StartPosition,  sizeof(entry.StartPosition),  1, m_indexStream);
+    if ( elementsRead != 3 ) {
+	cerr << "Error reading index entry. Expected 3 elements, read in: " << elementsRead << endl;
+	return false;
+    }
+
+    // swap endian-ness if necessary
+    if ( m_isBigEndian ) {
+	SwapEndian_32(entry.MaxEndPosition);
+	SwapEndian_64(entry.StartOffset);
+	SwapEndian_32(entry.StartPosition);
+    }
+
+    // save data
+    if ( saveData )
+	SaveOffsetEntry(refId, entry);
+
+    // return success/failure of load
+    return true;
+}
+
+// load a single reference from file, return true if loaded OK
+// @saveData - save data in memory if true, just read & discard if false
+bool BamToolsIndex::LoadFirstReference(bool saveData) {
+    BamToolsIndexData::const_iterator indexBegin = m_indexData.begin();
+    return LoadReference( (*indexBegin).first, saveData );
+}
+
+// load a single reference from file, return true if loaded OK
+// @saveData - save data in memory if true, just read & discard if false
+bool BamToolsIndex::LoadReference(const int& refId, bool saveData) {
+
+    // read in number of offsets for this reference
+    uint32_t numOffsets;
+    size_t elementsRead = fread(&numOffsets, sizeof(numOffsets), 1, m_indexStream);
+    if ( elementsRead != 1 ) return false;
+    if ( m_isBigEndian ) SwapEndian_32(numOffsets);
+
+    // initialize offsets container for this reference
+    SetOffsetCount(refId, (int)numOffsets);
+
+    // iterate over offset entries
+    for ( unsigned int j = 0; j < numOffsets; ++j )
+	LoadIndexEntry(refId, saveData);
+
+    // return success/failure of load
+    return true;
+}
+
+// loads number of references, return true if loaded OK
+bool BamToolsIndex::LoadReferenceCount(int& numReferences) {
+
+    size_t elementsRead = 0;
+
+    // read reference count
+    elementsRead += fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
+    if ( m_isBigEndian ) SwapEndian_32(numReferences);
+
+    // return success/failure of load
+    return ( elementsRead == 1 );
+}
+
+// saves an index offset entry in memory
+void BamToolsIndex::SaveOffsetEntry(const int& refId, const BamToolsIndexEntry& entry) {
+    BamToolsReferenceEntry& refEntry = m_indexData[refId];
+    refEntry.HasAlignments = true;
+    refEntry.Offsets.push_back(entry);
+}
+
+// pre-allocates size for offset vector
+void BamToolsIndex::SetOffsetCount(const int& refId, const int& offsetCount) {
+    BamToolsReferenceEntry& refEntry = m_indexData[refId];
+    refEntry.Offsets.reserve(offsetCount);
+    refEntry.HasAlignments = ( offsetCount > 0);
+}
+
+// initializes index data structure to hold @count references
+void BamToolsIndex::SetReferenceCount(const int& count) {
+    for ( int i = 0; i < count; ++i )
+	m_indexData[i].HasAlignments = false;
+}
+
+// position file pointer to first reference begin, return true if skipped OK
+bool BamToolsIndex::SkipToFirstReference(void) {
+    BamToolsIndexData::const_iterator indexBegin = m_indexData.begin();
+    return SkipToReference( (*indexBegin).first );
+}
+
+// position file pointer to desired reference begin, return true if skipped OK
+bool BamToolsIndex::SkipToReference(const int& refId) {
+
+    // attempt rewind
+    if ( !Rewind() ) return false;
+
+    // read in number of references
+    int32_t numReferences;
+    size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
+    if ( elementsRead != 1 ) return false;
+    if ( m_isBigEndian ) SwapEndian_32(numReferences);
+
+    // iterate over reference entries
+    bool skippedOk = true;
+    int currentRefId = 0;
+    while (currentRefId != refId) {
+	skippedOk &= LoadReference(currentRefId, false);
+	++currentRefId;
+    }
+
+    // return success/failure of skip
+    return skippedOk;
+}
+
+// write header to new index file
+bool BamToolsIndex::WriteHeader(void) {
+
+    size_t elementsWritten = 0;
+
+    // write BTI index format 'magic number'
+    elementsWritten += fwrite("BTI\1", 1, 4, m_indexStream);
+
+    // write BTI index format version
+    int32_t currentVersion = (int32_t)m_outputVersion;
+    if ( m_isBigEndian ) SwapEndian_32(currentVersion);
+    elementsWritten += fwrite(&currentVersion, sizeof(currentVersion), 1, m_indexStream);
+
+    // write block size
+    int32_t blockSize = m_blockSize;
+    if ( m_isBigEndian ) SwapEndian_32(blockSize);
+    elementsWritten += fwrite(&blockSize, sizeof(blockSize), 1, m_indexStream);
+
+    // store offset of beginning of data
+    m_dataBeginOffset = ftell64(m_indexStream);
+
+    // return success/failure of write
+    return ( elementsWritten == 6 );
+}
+
+// write index data for all references to new index file
+bool BamToolsIndex::WriteAllReferences(void) {
+
+    size_t elementsWritten = 0;
+
+    // write number of references
+    int32_t numReferences = (int32_t)m_indexData.size();
+    if ( m_isBigEndian ) SwapEndian_32(numReferences);
+    elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, m_indexStream);
+
+    // iterate through references in index
+    bool refOk = true;
+    BamToolsIndexData::const_iterator refIter = m_indexData.begin();
+    BamToolsIndexData::const_iterator refEnd  = m_indexData.end();
+    for ( ; refIter != refEnd; ++refIter )
+	refOk &= WriteReferenceEntry( (*refIter).second );
+
+    return ( (elementsWritten == 1) && refOk );
+}
+
+// write current reference index data to new index file
+bool BamToolsIndex::WriteReferenceEntry(const BamToolsReferenceEntry& refEntry) {
+
+    size_t elementsWritten = 0;
+
+    // write number of offsets listed for this reference
+    uint32_t numOffsets = refEntry.Offsets.size();
+    if ( m_isBigEndian ) SwapEndian_32(numOffsets);
+    elementsWritten += fwrite(&numOffsets, sizeof(numOffsets), 1, m_indexStream);
+
+    // iterate over offset entries
+    bool entriesOk = true;
+    vector<BamToolsIndexEntry>::const_iterator offsetIter = refEntry.Offsets.begin();
+    vector<BamToolsIndexEntry>::const_iterator offsetEnd  = refEntry.Offsets.end();
+    for ( ; offsetIter != offsetEnd; ++offsetIter )
+	entriesOk &= WriteIndexEntry( (*offsetIter) );
+
+    return ( (elementsWritten == 1) && entriesOk );
+}
+
+// write current index offset entry to new index file
+bool BamToolsIndex::WriteIndexEntry(const BamToolsIndexEntry& entry) {
+
+    // copy entry data
+    int32_t maxEndPosition = entry.MaxEndPosition;
+    int64_t startOffset    = entry.StartOffset;
+    int32_t startPosition  = entry.StartPosition;
+
+    // swap endian-ness if necessary
+    if ( m_isBigEndian ) {
+	SwapEndian_32(maxEndPosition);
+	SwapEndian_64(startOffset);
+	SwapEndian_32(startPosition);
+    }
+
+    // write the reference index entry
+    size_t elementsWritten = 0;
+    elementsWritten += fwrite(&maxEndPosition, sizeof(maxEndPosition), 1, m_indexStream);
+    elementsWritten += fwrite(&startOffset,    sizeof(startOffset),    1, m_indexStream);
+    elementsWritten += fwrite(&startPosition,  sizeof(startPosition),  1, m_indexStream);
+    return ( elementsWritten == 3 );
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamToolsIndex_p.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,192 @@
+// ***************************************************************************
+// BamToolsIndex.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides index operations for the BamTools index format (".bti")
+// ***************************************************************************
+
+#ifndef BAMTOOLS_INDEX_FORMAT_H
+#define BAMTOOLS_INDEX_FORMAT_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail.  This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+
+#include <BamAux.h>
+#include <BamIndex.h>
+#include <map>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+namespace Internal {
+
+// individual index offset entry
+struct BamToolsIndexEntry {
+
+    // data members
+    int32_t MaxEndPosition;
+    int64_t StartOffset;
+    int32_t StartPosition;
+
+    // ctor
+    BamToolsIndexEntry(const int32_t& maxEndPosition = 0,
+		       const int64_t& startOffset    = 0,
+		       const int32_t& startPosition  = 0)
+	: MaxEndPosition(maxEndPosition)
+	, StartOffset(startOffset)
+	, StartPosition(startPosition)
+    { }
+};
+
+// reference index entry
+struct BamToolsReferenceEntry {
+
+    // data members
+    bool HasAlignments;
+    std::vector<BamToolsIndexEntry> Offsets;
+
+    // ctor
+    BamToolsReferenceEntry(void)
+	: HasAlignments(false)
+    { }
+};
+
+// the actual index data structure
+typedef std::map<int, BamToolsReferenceEntry> BamToolsIndexData;
+
+class BamToolsIndex : public BamIndex {
+
+    // keep a list of any supported versions here
+    // (might be useful later to handle any 'legacy' versions if the format changes)
+    // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on
+    //
+    // so a change introduced in (hypothetical) BTI_1_2 would be handled from then on by:
+    //
+    // if ( indexVersion >= BTI_1_2 )
+    //   do something new
+    // else
+    //   do the old thing
+    enum Version { BTI_1_0 = 1
+		 , BTI_1_1
+		 , BTI_1_2
+		 };
+
+
+    // ctor & dtor
+    public:
+	BamToolsIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader);
+	~BamToolsIndex(void);
+
+    // interface (implements BamIndex virtual methods)
+    public:
+	// creates index data (in-memory) from current reader data
+	bool Build(void);
+	// returns supported file extension
+	const std::string Extension(void) const { return std::string(".bti"); }
+	// returns whether reference has alignments or no
+	bool HasAlignments(const int& referenceID) const;
+	// attempts to use index to jump to region; returns success/fail
+	// a "successful" jump indicates no error, but not whether this region has data
+	//   * thus, the method sets a flag to indicate whether there are alignments
+	//     available after the jump position
+	bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
+    public:
+	// clear all current index offset data in memory
+	void ClearAllData(void);
+	// return file position after header metadata
+	const off_t DataBeginOffset(void) const;
+	// return true if all index data is cached
+	bool HasFullDataCache(void) const;
+	// clears index data from all references except the first
+	void KeepOnlyFirstReferenceOffsets(void);
+	// load index data for all references, return true if loaded OK
+	// @saveData - save data in memory if true, just read & discard if false
+	bool LoadAllReferences(bool saveData = true);
+	// load first reference from file, return true if loaded OK
+	// @saveData - save data in memory if true, just read & discard if false
+	bool LoadFirstReference(bool saveData = true);
+	// load header data from index file, return true if loaded OK
+	bool LoadHeader(void);
+	// position file pointer to first reference begin, return true if skipped OK
+	bool SkipToFirstReference(void);
+	// write index reference data
+	bool WriteAllReferences(void);
+	// write index header data
+	bool WriteHeader(void);
+
+    // 'internal' methods
+    public:
+
+	// -----------------------
+	// index file operations
+
+	// check index file magic number, return true if OK
+	bool CheckMagicNumber(void);
+	// check index file version, return true if OK
+	bool CheckVersion(void);
+	// return true if FILE* is open
+	bool IsOpen(void) const;
+	// load a single index entry from file, return true if loaded OK
+	// @saveData - save data in memory if true, just read & discard if false
+	bool LoadIndexEntry(const int& refId, bool saveData = true);
+	// load a single reference from file, return true if loaded OK
+	// @saveData - save data in memory if true, just read & discard if false
+	bool LoadReference(const int& refId, bool saveData = true);
+	// loads number of references, return true if loaded OK
+	bool LoadReferenceCount(int& numReferences);
+	// position file pointer to desired reference begin, return true if skipped OK
+	bool SkipToReference(const int& refId);
+	// write current reference index data to new index file
+	bool WriteReferenceEntry(const BamToolsReferenceEntry& refEntry);
+	// write current index offset entry to new index file
+	bool WriteIndexEntry(const BamToolsIndexEntry& entry);
+
+	// -----------------------
+	// index data operations
+
+	// clear all index offset data for desired reference
+	void ClearReferenceOffsets(const int& refId);
+	// calculate BAM file offset for desired region
+	// return true if no error (*NOT* equivalent to "has alignments or valid offset")
+	//   check @hasAlignmentsInRegion to determine this status
+	// @region - target region
+	// @offset - resulting seek target
+	// @hasAlignmentsInRegion - sometimes a file just lacks data in region, this flag indicates that status
+	bool GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
+	// returns true if index cache has data for desired reference
+	bool IsDataLoaded(const int& refId) const;
+	// clears index data from all references except the one specified
+	void KeepOnlyReferenceOffsets(const int& refId);
+	// saves an index offset entry in memory
+	void SaveOffsetEntry(const int& refId, const BamToolsIndexEntry& entry);
+	// pre-allocates size for offset vector
+	void SetOffsetCount(const int& refId, const int& offsetCount);
+	// initializes index data structure to hold @count references
+	void SetReferenceCount(const int& count);
+
+    // data members
+    private:
+	int32_t           m_blockSize;
+	BamToolsIndexData m_indexData;
+	off_t             m_dataBeginOffset;
+	bool              m_hasFullDataCache;
+	bool              m_isBigEndian;
+	int32_t           m_inputVersion; // Version is serialized as int
+	Version           m_outputVersion;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMTOOLS_INDEX_FORMAT_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamWriter.cpp	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,47 @@
+// ***************************************************************************
+// BamWriter.cpp (c) 2009 Michael Str�mberg, Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 22 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for producing BAM files
+// ***************************************************************************
+
+#include <BamWriter.h>
+#include <BamWriter_p.h>
+using namespace BamTools;
+using namespace BamTools::Internal;
+
+#include <iostream>
+using namespace std;
+
+// constructor
+BamWriter::BamWriter(void) {
+    d = new BamWriterPrivate;
+}
+
+// destructor
+BamWriter::~BamWriter(void) {
+    delete d;
+    d = 0;
+}
+
+// closes the alignment archive
+void BamWriter::Close(void) {
+    d->Close();
+}
+
+// opens the alignment archive
+bool BamWriter::Open(const string& filename,
+                     const string& samHeader,
+                     const RefVector& referenceSequences,
+                     bool isWriteUncompressed)
+{
+    return d->Open(filename, samHeader, referenceSequences, isWriteUncompressed);
+}
+
+// saves the alignment to the alignment archive
+void BamWriter::SaveAlignment(const BamAlignment& al) {
+    d->SaveAlignment(al);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamWriter.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,50 @@
+// ***************************************************************************
+// BamWriter.h (c) 2009 Michael Str�mberg, Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for producing BAM files
+// ***************************************************************************
+
+#ifndef BAMWRITER_H
+#define BAMWRITER_H
+
+#include <api_global.h>
+#include <BamAlignment.h>
+#include <string>
+
+namespace BamTools {
+
+namespace Internal {
+    class BamWriterPrivate;
+} // namespace Internal
+
+class API_EXPORT BamWriter {
+
+    // constructor/destructor
+    public:
+        BamWriter(void);
+        ~BamWriter(void);
+
+    // public interface
+    public:
+        // closes the alignment archive
+        void Close(void);
+        // opens the alignment archive
+        bool Open(const std::string& filename, 
+                  const std::string& samHeader, 
+                  const BamTools::RefVector& referenceSequences, 
+                  bool writeUncompressed = false);
+        // saves the alignment to the alignment archive
+        void SaveAlignment(const BamTools::BamAlignment& al);
+
+    // private implementation
+    private:
+        Internal::BamWriterPrivate* d;
+};
+
+} // namespace BamTools
+
+#endif // BAMWRITER_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamWriter_p.cpp	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,379 @@
+// ***************************************************************************
+// BamWriter_p.cpp (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 22 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for producing BAM files
+// ***************************************************************************
+
+#include <BamAlignment.h>
+#include <BamWriter_p.h>
+using namespace BamTools;
+using namespace BamTools::Internal;
+using namespace std;
+
+BamWriterPrivate::BamWriterPrivate(void) {
+    IsBigEndian = SystemIsBigEndian();
+}
+
+BamWriterPrivate::~BamWriterPrivate(void) {
+    mBGZF.Close();
+}
+
+// closes the alignment archive
+void BamWriterPrivate::Close(void) {
+    mBGZF.Close();
+}
+
+// calculates minimum bin for a BAM alignment interval
+const unsigned int BamWriterPrivate::CalculateMinimumBin(const int begin, int end) const {
+    --end;
+    if( (begin >> 14) == (end >> 14) ) return 4681 + (begin >> 14);
+    if( (begin >> 17) == (end >> 17) ) return  585 + (begin >> 17);
+    if( (begin >> 20) == (end >> 20) ) return   73 + (begin >> 20);
+    if( (begin >> 23) == (end >> 23) ) return    9 + (begin >> 23);
+    if( (begin >> 26) == (end >> 26) ) return    1 + (begin >> 26);
+    return 0;
+}
+
+// creates a cigar string from the supplied alignment
+void BamWriterPrivate::CreatePackedCigar(const vector<CigarOp>& cigarOperations, string& packedCigar) {
+
+    // initialize
+    const unsigned int numCigarOperations = cigarOperations.size();
+    packedCigar.resize(numCigarOperations * BT_SIZEOF_INT);
+
+    // pack the cigar data into the string
+    unsigned int* pPackedCigar = (unsigned int*)packedCigar.data();
+
+    unsigned int cigarOp;
+    vector<CigarOp>::const_iterator coIter;
+    for(coIter = cigarOperations.begin(); coIter != cigarOperations.end(); ++coIter) {
+
+	switch(coIter->Type) {
+	    case 'M':
+		  cigarOp = BAM_CMATCH;
+		  break;
+	    case 'I':
+		  cigarOp = BAM_CINS;
+		  break;
+	    case 'D':
+		  cigarOp = BAM_CDEL;
+		  break;
+	    case 'N':
+		  cigarOp = BAM_CREF_SKIP;
+		  break;
+	    case 'S':
+		  cigarOp = BAM_CSOFT_CLIP;
+		  break;
+	    case 'H':
+		  cigarOp = BAM_CHARD_CLIP;
+		  break;
+	    case 'P':
+		  cigarOp = BAM_CPAD;
+		  break;
+	    default:
+		  fprintf(stderr, "ERROR: Unknown cigar operation found: %c\n", coIter->Type);
+		  exit(1);
+	}
+
+	*pPackedCigar = coIter->Length << BAM_CIGAR_SHIFT | cigarOp;
+	pPackedCigar++;
+    }
+}
+
+// encodes the supplied query sequence into 4-bit notation
+void BamWriterPrivate::EncodeQuerySequence(const string& query, string& encodedQuery) {
+
+    // prepare the encoded query string
+    const unsigned int queryLen = query.size();
+    const unsigned int encodedQueryLen = (unsigned int)((queryLen / 2.0) + 0.5);
+    encodedQuery.resize(encodedQueryLen);
+    char* pEncodedQuery = (char*)encodedQuery.data();
+    const char* pQuery = (const char*)query.data();
+
+    unsigned char nucleotideCode;
+    bool useHighWord = true;
+
+    while(*pQuery) {
+
+	switch(*pQuery) {
+
+	    case '=':
+		nucleotideCode = 0;
+		break;
+
+	    case 'A':
+		nucleotideCode = 1;
+		break;
+
+	    case 'C':
+		nucleotideCode = 2;
+		break;
+
+	    case 'G':
+		nucleotideCode = 4;
+		break;
+
+	    case 'T':
+		nucleotideCode = 8;
+		break;
+
+	    case 'N':
+		nucleotideCode = 15;
+		break;
+
+	    default:
+		fprintf(stderr, "ERROR: Only the following bases are supported in the BAM format: {=, A, C, G, T, N}. Found [%c]\n", *pQuery);
+		exit(1);
+	}
+
+	// pack the nucleotide code
+	if(useHighWord) {
+	    *pEncodedQuery = nucleotideCode << 4;
+	    useHighWord = false;
+	} else {
+	    *pEncodedQuery |= nucleotideCode;
+	    pEncodedQuery++;
+	    useHighWord = true;
+	}
+
+	// increment the query position
+	pQuery++;
+    }
+}
+
+// opens the alignment archive
+bool BamWriterPrivate::Open(const string& filename,
+			    const string& samHeader,
+			    const RefVector& referenceSequences,
+			    bool isWriteUncompressed)
+{
+    // open the BGZF file for writing, return failure if error
+    if ( !mBGZF.Open(filename, "wb", isWriteUncompressed) )
+	return false;
+
+    // ================
+    // write the header
+    // ================
+
+    // write the BAM signature
+    const unsigned char SIGNATURE_LENGTH = 4;
+    const char* BAM_SIGNATURE = "BAM\1";
+    mBGZF.Write(BAM_SIGNATURE, SIGNATURE_LENGTH);
+
+    // write the SAM header text length
+    uint32_t samHeaderLen = samHeader.size();
+    if (IsBigEndian) SwapEndian_32(samHeaderLen);
+    mBGZF.Write((char*)&samHeaderLen, BT_SIZEOF_INT);
+
+    // write the SAM header text
+    if(samHeaderLen > 0)
+	mBGZF.Write(samHeader.data(), samHeaderLen);
+
+    // write the number of reference sequences
+    uint32_t numReferenceSequences = referenceSequences.size();
+    if (IsBigEndian) SwapEndian_32(numReferenceSequences);
+    mBGZF.Write((char*)&numReferenceSequences, BT_SIZEOF_INT);
+
+    // =============================
+    // write the sequence dictionary
+    // =============================
+
+    RefVector::const_iterator rsIter;
+    for(rsIter = referenceSequences.begin(); rsIter != referenceSequences.end(); rsIter++) {
+
+	// write the reference sequence name length
+	uint32_t referenceSequenceNameLen = rsIter->RefName.size() + 1;
+	if (IsBigEndian) SwapEndian_32(referenceSequenceNameLen);
+	mBGZF.Write((char*)&referenceSequenceNameLen, BT_SIZEOF_INT);
+
+	// write the reference sequence name
+	mBGZF.Write(rsIter->RefName.c_str(), referenceSequenceNameLen);
+
+	// write the reference sequence length
+	int32_t referenceLength = rsIter->RefLength;
+	if (IsBigEndian) SwapEndian_32(referenceLength);
+	mBGZF.Write((char*)&referenceLength, BT_SIZEOF_INT);
+    }
+
+    // return success
+    return true;
+}
+
+// saves the alignment to the alignment archive
+void BamWriterPrivate::SaveAlignment(const BamAlignment& al) {
+
+    // if BamAlignment contains only the core data and a raw char data buffer
+    // (as a result of BamReader::GetNextAlignmentCore())
+    if ( al.SupportData.HasCoreOnly ) {
+
+	// write the block size
+	unsigned int blockSize = al.SupportData.BlockLength;
+	if (IsBigEndian) SwapEndian_32(blockSize);
+	mBGZF.Write((char*)&blockSize, BT_SIZEOF_INT);
+
+	// assign the BAM core data
+	uint32_t buffer[8];
+	buffer[0] = al.RefID;
+	buffer[1] = al.Position;
+	buffer[2] = (al.Bin << 16) | (al.MapQuality << 8) | al.SupportData.QueryNameLength;
+	buffer[3] = (al.AlignmentFlag << 16) | al.SupportData.NumCigarOperations;
+	buffer[4] = al.SupportData.QuerySequenceLength;
+	buffer[5] = al.MateRefID;
+	buffer[6] = al.MatePosition;
+	buffer[7] = al.InsertSize;
+
+	// swap BAM core endian-ness, if necessary
+	if ( IsBigEndian ) {
+	    for ( int i = 0; i < 8; ++i )
+		SwapEndian_32(buffer[i]);
+	}
+
+	// write the BAM core
+	mBGZF.Write((char*)&buffer, BAM_CORE_SIZE);
+
+	// write the raw char data
+	mBGZF.Write((char*)al.SupportData.AllCharData.data(), al.SupportData.BlockLength-BAM_CORE_SIZE);
+    }
+
+    // otherwise, BamAlignment should contain character in the standard fields: Name, QueryBases, etc
+    // ( resulting from BamReader::GetNextAlignment() *OR* being generated directly by client code )
+    else {
+
+	// calculate char lengths
+	const unsigned int nameLength         = al.Name.size() + 1;
+	const unsigned int numCigarOperations = al.CigarData.size();
+	const unsigned int queryLength        = al.QueryBases.size();
+	const unsigned int tagDataLength      = al.TagData.size();
+
+	// no way to tell if BamAlignment.Bin is already defined (no default, invalid value)
+	// force calculation of Bin before storing
+	const int endPosition = al.GetEndPosition();
+	const unsigned int alignmentBin = CalculateMinimumBin(al.Position, endPosition);
+
+	// create our packed cigar string
+	string packedCigar;
+	CreatePackedCigar(al.CigarData, packedCigar);
+	const unsigned int packedCigarLength = packedCigar.size();
+
+	// encode the query
+	string encodedQuery;
+	EncodeQuerySequence(al.QueryBases, encodedQuery);
+	const unsigned int encodedQueryLength = encodedQuery.size();
+
+	// write the block size
+	const unsigned int dataBlockSize = nameLength + packedCigarLength + encodedQueryLength + queryLength + tagDataLength;
+	unsigned int blockSize = BAM_CORE_SIZE + dataBlockSize;
+	if (IsBigEndian) SwapEndian_32(blockSize);
+	mBGZF.Write((char*)&blockSize, BT_SIZEOF_INT);
+
+	// assign the BAM core data
+	uint32_t buffer[8];
+	buffer[0] = al.RefID;
+	buffer[1] = al.Position;
+	buffer[2] = (alignmentBin << 16) | (al.MapQuality << 8) | nameLength;
+	buffer[3] = (al.AlignmentFlag << 16) | numCigarOperations;
+	buffer[4] = queryLength;
+	buffer[5] = al.MateRefID;
+	buffer[6] = al.MatePosition;
+	buffer[7] = al.InsertSize;
+
+	// swap BAM core endian-ness, if necessary
+	if ( IsBigEndian ) {
+	    for ( int i = 0; i < 8; ++i )
+		SwapEndian_32(buffer[i]);
+	}
+
+	// write the BAM core
+	mBGZF.Write((char*)&buffer, BAM_CORE_SIZE);
+
+	// write the query name
+	mBGZF.Write(al.Name.c_str(), nameLength);
+
+	// write the packed cigar
+	if ( IsBigEndian ) {
+
+	    char* cigarData = (char*)calloc(sizeof(char), packedCigarLength);
+	    memcpy(cigarData, packedCigar.data(), packedCigarLength);
+
+	    for (unsigned int i = 0; i < packedCigarLength; ++i) {
+		if ( IsBigEndian )
+		  SwapEndian_32p(&cigarData[i]);
+	    }
+
+	    mBGZF.Write(cigarData, packedCigarLength);
+	    free(cigarData);
+	}
+	else
+	    mBGZF.Write(packedCigar.data(), packedCigarLength);
+
+	// write the encoded query sequence
+	mBGZF.Write(encodedQuery.data(), encodedQueryLength);
+
+	// write the base qualities
+	string baseQualities(al.Qualities);
+	char* pBaseQualities = (char*)al.Qualities.data();
+	for(unsigned int i = 0; i < queryLength; i++) {
+	    pBaseQualities[i] -= 33;
+	}
+	mBGZF.Write(pBaseQualities, queryLength);
+
+	// write the read group tag
+	if ( IsBigEndian ) {
+
+	    char* tagData = (char*)calloc(sizeof(char), tagDataLength);
+	    memcpy(tagData, al.TagData.data(), tagDataLength);
+
+	    int i = 0;
+	    while ( (unsigned int)i < tagDataLength ) {
+
+		i += 2;                                 // skip tag type (e.g. "RG", "NM", etc)
+		uint8_t type = toupper(tagData[i]);     // lower & upper case letters have same meaning
+		++i;                                    // skip value type
+
+		switch (type) {
+
+		    case('A') :
+		    case('C') :
+			++i;
+			break;
+
+		    case('S') :
+			SwapEndian_16p(&tagData[i]);
+			i+=2; // sizeof(uint16_t)
+			break;
+
+		    case('F') :
+		    case('I') :
+			SwapEndian_32p(&tagData[i]);
+			i+=4; // sizeof(uint32_t)
+			break;
+
+		    case('D') :
+			SwapEndian_64p(&tagData[i]);
+			i+=8; // sizeof(uint64_t)
+			break;
+
+		    case('H') :
+		    case('Z') :
+			while (tagData[i]) { ++i; }
+			++i; // increment one more for null terminator
+			break;
+
+		    default :
+			fprintf(stderr, "ERROR: Invalid tag value type\n"); // shouldn't get here
+			free(tagData);
+			exit(1);
+		}
+	    }
+
+	    mBGZF.Write(tagData, tagDataLength);
+	    free(tagData);
+	}
+	else
+	    mBGZF.Write(al.TagData.data(), tagDataLength);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamWriter_p.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,63 @@
+// ***************************************************************************
+// BamWriter_p.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic functionality for producing BAM files
+// ***************************************************************************
+
+#ifndef BAMWRITER_P_H
+#define BAMWRITER_P_H
+
+//  -------------
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the BamTools API.  It exists purely as an
+// implementation detail.  This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+
+#include <BamAux.h>
+#include <BGZF.h>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+namespace Internal {
+
+class BamWriterPrivate {
+
+    // ctor & dtor
+    public:
+	BamWriterPrivate(void);
+	~BamWriterPrivate(void);
+
+    // "public" interface to BamWriter
+    public:
+	void Close(void);
+	bool Open(const std::string& filename,
+		  const std::string& samHeader,
+		  const BamTools::RefVector& referenceSequences,
+		  bool isWriteUncompressed);
+	void SaveAlignment(const BamAlignment& al);
+
+    // internal methods
+    public:
+	const unsigned int CalculateMinimumBin(const int begin, int end) const;
+	void CreatePackedCigar(const std::vector<BamTools::CigarOp>& cigarOperations, std::string& packedCigar);
+	void EncodeQuerySequence(const std::string& query, std::string& encodedQuery);
+
+    // data members
+    public:
+	BgzfData mBGZF;
+	bool IsBigEndian;
+};
+
+} // namespace Internal
+} // namespace BamTools
+
+#endif // BAMWRITER_P_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/Makevars.in	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,4 @@
+PKG_LIBS=@LIBS@ -lz
+PKG_CFLAGS=-I./ -D_FASTMAP -DMAQ_LONGREADS @HAVE_LIBBZ2@ 
+PKG_CXXFLAGS=-I./ -D_FASTMAP -DMAQ_LONGREADS	@HAVE_LIBBZ2@ 
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/api_global.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,22 @@
+// ***************************************************************************
+// api_global.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides macros for exporting & importing BamTools API library symbols
+// ***************************************************************************
+
+#ifndef API_GLOBAL_H
+#define API_GLOBAL_H
+
+#include "bamtools_global.h"
+
+#ifdef BAMTOOLS_API_LIBRARY
+#  define API_EXPORT BAMTOOLS_LIBRARY_EXPORT
+#else
+#  define API_EXPORT BAMTOOLS_LIBRARY_IMPORT
+#endif
+
+#endif // API_GLOBAL_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/bamread.cpp	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,222 @@
+#include "pc.h"
+#include "config.h"
+#include <vector>
+#include <string.h>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <strstream>
+#include <algorithm>
+#include <string>
+#include <functional>
+#include <utility>
+#include <ext/hash_map>
+#include <boost/tokenizer.hpp>
+
+#include "BamAlignment.h"
+#include "BamAux.h"         /* RefVector/RefData */
+#include "BamReader.h"
+
+
+extern "C" {
+#include "R.h"
+#include "Rmath.h"
+#include "Rinternals.h"
+#include "Rdefines.h"
+}
+
+using namespace std;
+using namespace __gnu_cxx; 
+
+
+class lessAbsoluteValue {
+public:
+  bool operator()(int a, int b) const {
+    return abs(a) < abs(b);
+  }
+};
+
+
+
+
+
+//#define DEBUG 1
+
+extern "C" {
+
+
+  // read in bam file
+  SEXP read_bam(SEXP filename,SEXP read_tag_names_R) {
+
+#ifdef DEBUG  
+    Rprintf("start\n");
+#endif
+  const char* fname=CHAR(asChar(filename));
+  int read_names=*(INTEGER(read_tag_names_R));
+#ifdef DEBUG  
+  Rprintf("fname=%s\n",fname);
+#endif
+
+  // main data vector
+  // chr - pos
+  vector< vector<int> > pos;
+  vector< vector<int> > posnm; // number of mismatches
+  vector< vector<string> > tagnames;
+
+  // chromosome map
+  hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+  vector<string> cnames;
+  
+
+  typedef boost::tokenizer<boost::char_separator<char> >  tokType;
+  boost::char_separator<char> sep("\t","",boost::keep_empty_tokens);
+  boost::char_separator<char> sep2(",");
+
+  BamTools::BamReader bamf;
+  
+  if (!bamf.Open(fname)) {
+    cout << "ERROR: failed to open BAM file '" << fname << "'" << endl;
+  } else {
+    
+    Rprintf("opened %s\n",fname);
+    BamTools::RefVector refs = bamf.GetReferenceData();
+    BamTools::BamAlignment al;
+    
+    int fcount=0;
+    while (bamf.GetNextAlignment(al)) {
+      if (!al.IsMapped() || !al.IsPrimaryAlignment()) {
+	continue;
+      }
+      
+      string tagname=al.Name;
+      string chr=refs[al.RefID].RefName;
+      int fpos=(int) (al.Position + (al.IsReverseStrand() ? al.Length : 0));
+      if(al.IsReverseStrand()) { fpos=-1*fpos; }
+      
+      uint32_t nms;
+      int nm=0;
+      if (al.GetEditDistance(nms)) { 
+	nm=nms;
+      }
+      
+      
+      // determine the chromosome index
+      hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+      int cind=-1;
+      if(li==cind_map.end()) {
+	// register new chromosome
+	cind=cnames.size();
+	cnames.push_back(chr);
+	cind_map[chr]=cind;
+	// allocate new pos vector
+	pos.push_back(vector<int>());
+	posnm.push_back(vector<int>());
+	if(read_names) {
+	  tagnames.push_back(vector<string>());
+	}
+#ifdef DEBUG  
+	Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+      } else {
+	cind=li->second;
+      }
+      fcount++;
+      (pos[cind]).push_back(fpos);
+      (posnm[cind]).push_back(nm);
+      if(read_names) {
+	(tagnames[cind]).push_back(al.Name);
+      }
+#ifdef DEBUG  
+      Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d",chr.c_str(),cind,fpos,nm);
+      if(fcount>30) {
+	break;
+      }
+#endif
+      
+    }
+    bamf.Close();
+    
+    Rprintf("done. read %d fragments\n",fcount);
+  }
+  
+  
+
+  // construct output structures
+  SEXP chnames;
+  int np=0; // number of protections
+  PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+  for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+    SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+  }
+  np++;
+
+  // sort
+  //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+  //  sort(csi->begin(), csi->end(), lessAbsoluteValue());
+  //}
+
+  SEXP ans;
+  PROTECT(ans = allocVector(VECSXP, cnames.size()));   np++;
+  vector<vector<int> >::const_iterator nsi;
+  vector<vector<string> >::const_iterator ssi;
+  for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+    nsi=posnm.begin()+(csi-pos.begin());
+
+    SEXP dv,dnames_R;
+    PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++;
+    SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+    SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+    if(read_names) {
+      SET_STRING_ELT(dnames_R, 2, mkChar("s"));
+    }
+    
+    
+    
+    SEXP tv,nv,sv;
+    PROTECT(tv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(nv=allocVector(INTSXP,csi->size()));   np++;
+    if(read_names) {
+      PROTECT(sv=allocVector(STRSXP,csi->size()));   np++;
+    }
+    int* i_tv=INTEGER(tv);
+    int* i_nv=INTEGER(nv);
+    
+    int i=0;
+    vector<int>::const_iterator ini=nsi->begin();
+    for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+      i_tv[i]=*pi;
+      i_nv[i]=*ini++;
+      i++;
+    }
+    if(read_names) {
+      int i=0;
+      ssi=tagnames.begin()+(csi-pos.begin());
+      for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+	SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+	i++;
+      }
+    }
+    PROTECT(dv = allocVector(VECSXP, 2+read_names));   np++;
+    SET_VECTOR_ELT(dv, 0, tv);
+    SET_VECTOR_ELT(dv, 1, nv);
+    if(read_names) {
+      SET_VECTOR_ELT(dv, 2, sv);
+    }
+    setAttrib(dv, R_NamesSymbol, dnames_R);
+    
+    SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+  }
+
+  setAttrib(ans,R_NamesSymbol,chnames);
+  
+#ifdef DEBUG  
+  Rprintf("unprotecting %d elements\n",np);
+#endif
+  
+  UNPROTECT(np);
+  return(ans);
+}
+
+
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/bamtools_global.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,32 @@
+// ***************************************************************************
+// bamtools_global.h (c) 2010 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic definitions for exporting & importing library symbols
+// ***************************************************************************
+
+#ifndef BAMTOOLS_GLOBAL_H
+#define BAMTOOLS_GLOBAL_H
+
+// BAMTOOLS_LIBRARY_EXPORT
+#ifndef BAMTOOLS_LIBRARY_EXPORT
+#  if defined(WIN32)
+#    define BAMTOOLS_LIBRARY_EXPORT __declspec(dllexport)
+#  else
+#    define BAMTOOLS_LIBRARY_EXPORT __attribute__((visibility("default")))
+#  endif
+#endif // BAMTOOLS_LIBRARY_EXPORT
+
+// BAMTOOLS_LIBRARY_IMPORT
+#ifndef BAMTOOLS_LIBRARY_IMPORT
+#  if defined(WIN32)
+#    define BAMTOOLS_LIBRARY_IMPORT __declspec(dllimport)
+#  else
+#    define BAMTOOLS_LIBRARY_IMPORT
+#  endif
+#endif // BAMTOOLS_LIBRARY_IMPORT
+
+#endif // BAMTOOLS_GLOBAL_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/bed2vector.cpp	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,2628 @@
+#include "pc.h"
+#include "config.h"
+#include <vector>
+#include <string.h>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <strstream>
+#include <algorithm>
+#include <string>
+#include <functional>
+#include <utility>
+#include <ext/hash_map>
+#include <boost/tokenizer.hpp>
+
+#ifdef HAVE_LIBBZ2
+#include <bzlib.h>
+#endif 
+
+extern "C" {
+#include "R.h"
+#include "Rmath.h"
+#include "Rinternals.h"
+#include "Rdefines.h"
+}
+
+using namespace std;
+using namespace __gnu_cxx; 
+
+
+class lessAbsoluteValue {
+public:
+  bool operator()(int a, int b) const {
+    return abs(a) < abs(b);
+  }
+};
+
+
+
+#ifdef HAVE_LIBBZ2
+int get_bzline(BZFILE* b,string& line) {
+  char c;
+  int     nBuf;
+  int bzerror=BZ_OK;
+
+  while(bzerror == BZ_OK)  {  
+    nBuf=BZ2_bzRead(&bzerror, b, &c, 1);
+    if(bzerror==BZ_OK) {
+      if(c=='\n') {
+	return bzerror;
+      } else {
+	line+=c;
+      }
+    }
+  }
+  return bzerror;
+}
+
+int get_a_line(FILE *f,BZFILE *b,int bz2file,string& line) {
+  line="";
+  if(bz2file) {
+    int bzerror=get_bzline(b,line);
+    if(bzerror==BZ_OK) {
+      return(1);
+    } else {
+      if(bzerror!=BZ_STREAM_END) {
+	cerr<<"encountered BZERROR="<<bzerror<<endl;
+      }
+      return(0);
+    }
+  } else {
+    char *cline=NULL;
+    size_t n;
+    if(getline(&cline,&n,f) != -1) {
+      if(cline) {
+	cline[strlen(cline)-1]='\0';
+	line+=cline;
+	free(cline);
+      }
+      return(1);
+    } else {
+      return(0);
+    }
+  }
+}
+#endif
+
+
+/**
+ * Read in .bed data into a list chromosome of vectors representing 5' positions, with sign
+ * corresponding to the strand.
+ */
+
+//#define DEBUG 1
+
+extern "C" {
+SEXP read_bed_ends(SEXP filename) {
+
+#ifdef DEBUG  
+  Rprintf("start\n");
+#endif
+  const char* fname=CHAR(asChar(filename));
+#ifdef DEBUG  
+  Rprintf("fname=%s\n",fname);
+#endif
+
+  // main data vector
+  // chr - pos
+  vector< vector<int> > pos;
+
+  // chromosome map
+  hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+  vector<string> cnames;
+
+  typedef boost::tokenizer<boost::char_separator<char> >  tokType;
+  boost::char_separator<char> sep(" \t");
+
+
+  ifstream bed_file(fname);
+
+#ifdef DEBUG  
+  Rprintf("opened %s\n",fname);
+#endif
+
+  Rprintf("opened %s\n",fname);
+
+  // read in bed line
+  string line;
+    
+  int fcount=0;
+  while(getline(bed_file,line)) {
+
+#ifdef DEBUG  
+    Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+    tokType tok(line, sep);
+    tokType::iterator sit=tok.begin();
+    if(sit!=tok.end()) {
+      string chr=*sit++; //chr=chr.substr(3,strlen(chr.c_str()));
+      string str_start=*sit++;
+      int fstart=atoi(str_start.c_str());
+      string str_end=*sit++;
+      int fend=atoi(str_end.c_str());
+      int fpos=fstart;
+      if(sit!=tok.end()) {
+         string u0=*sit++;
+         string nfield=*sit++;
+         string strand=*sit++;
+         if(strand=="-") { 
+	   fpos=-1*fend;
+         }
+      }
+
+      // determine the chromosome index
+      hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+      int cind=-1;
+      if(li==cind_map.end()) {
+	// register new chromosome
+	cind=cnames.size();
+	cnames.push_back(chr);
+	cind_map[chr]=cind;
+	// allocate new pos vector
+	pos.push_back(vector<int>());
+#ifdef DEBUG  
+	Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+      } else {
+	cind=li->second;
+      }
+      fcount++;
+      (pos[cind]).push_back(fpos);
+#ifdef DEBUG  
+      Rprintf("read in position chr=%s cind=%d fpos=%d\n",chr.c_str(),cind,fpos);
+      if(fcount>30) {
+	break;
+      }
+#endif
+      
+    }
+  }
+  bed_file.close();
+    
+    
+#ifdef DEBUG  
+  Rprintf("done. read %d fragments\n",fcount);
+#endif
+
+  Rprintf("done. read %d fragments\n",fcount);
+
+    // construct output structures
+  SEXP chnames;
+  int np=0; // number of protections
+  PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+  for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+    SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+  }
+  np++;
+
+  // sort
+  for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+    sort(csi->begin(), csi->end(), lessAbsoluteValue());
+  }
+
+  SEXP ans;
+  PROTECT(ans = allocVector(VECSXP, cnames.size()));   np++;
+  for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+    SEXP nv;
+    PROTECT(nv=allocVector(INTSXP,csi->size()));   np++;
+    int* i_nv=INTEGER(nv);
+    int i=0;
+    for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+      i_nv[i++]=*pi;
+    }
+    SET_VECTOR_ELT(ans, csi-pos.begin(), nv);
+  }
+
+  setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG  
+  Rprintf("unprotecting %d elements\n",np);
+#endif
+  
+  UNPROTECT(np);
+  return(ans);
+}
+
+
+
+SEXP read_meland_old(SEXP filename) {
+
+#ifdef DEBUG  
+  Rprintf("start\n");
+#endif
+  const char* fname=CHAR(asChar(filename));
+#ifdef DEBUG  
+  Rprintf("fname=%s\n",fname);
+#endif
+
+  // main data vector
+  // chr - pos
+  vector< vector<int> > pos;
+  vector< vector<int> > posnm; // number of mismatches
+  vector< vector<int> > poslen; // length
+
+  // chromosome map
+  hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+  vector<string> cnames;
+  
+
+  typedef boost::tokenizer<boost::char_separator<char> >  tokType;
+  boost::char_separator<char> sep(" \t");
+
+
+  ifstream bed_file(fname);
+
+  Rprintf("opened %s\n",fname);
+
+  // read in bed line
+  string line;
+    
+  int fcount=0;
+  while(getline(bed_file,line)) {
+
+#ifdef DEBUG  
+    Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+    tokType tok(line, sep);
+    tokType::iterator sit=tok.begin();
+    if(sit!=tok.end()) {
+      sit++; sit++; 
+      string str_nm=*sit++;
+      int nm=0;
+      if(str_nm[0]=='U') {
+	nm=atoi((str_nm.c_str()+1));
+      } else {
+	continue;
+      }
+      sit++; sit++; sit++;
+      string str_len=*sit++;
+      int len=atoi(str_len.c_str());
+      string chr=*sit++; chr=chr.substr(3,strlen(chr.c_str()));
+      string str_pos=*sit++;
+      int fpos=atoi(str_pos.c_str());
+      
+      // determine the chromosome index
+      hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+      int cind=-1;
+      if(li==cind_map.end()) {
+	// register new chromosome
+	cind=cnames.size();
+	cnames.push_back(chr);
+	cind_map[chr]=cind;
+	// allocate new pos vector
+	pos.push_back(vector<int>());
+	posnm.push_back(vector<int>());
+	poslen.push_back(vector<int>());
+#ifdef DEBUG  
+	Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+      } else {
+	cind=li->second;
+      }
+      fcount++;
+      (pos[cind]).push_back(fpos);
+      (posnm[cind]).push_back(nm);
+      (poslen[cind]).push_back(len);
+#ifdef DEBUG  
+      Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+      if(fcount>30) {
+	break;
+      }
+#endif
+      
+    }
+  }
+  bed_file.close();
+    
+    
+#ifdef DEBUG  
+  Rprintf("done. read %d fragments\n",fcount);
+#endif
+
+  Rprintf("done. read %d fragments\n",fcount);
+
+    // construct output structures
+  SEXP chnames;
+  int np=0; // number of protections
+  PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+  for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+    SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+  }
+  np++;
+
+  // sort
+  //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+  //  sort(csi->begin(), csi->end(), lessAbsoluteValue());
+  //}
+
+  SEXP ans;
+  PROTECT(ans = allocVector(VECSXP, cnames.size()));   np++;
+  vector<vector<int> >::const_iterator nsi,lsi;
+  for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+    nsi=posnm.begin()+(csi-pos.begin());
+    lsi=poslen.begin()+(csi-pos.begin());
+
+    SEXP dv,dnames_R;
+    PROTECT(dnames_R = allocVector(STRSXP, 3)); np++;
+    SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+    SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+    SET_STRING_ELT(dnames_R, 2, mkChar("l"));
+    
+    
+    
+    SEXP tv,nv,lv;
+    PROTECT(tv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(nv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(lv=allocVector(INTSXP,csi->size()));   np++;
+    int* i_tv=INTEGER(tv);
+    int* i_nv=INTEGER(nv);
+    int* i_lv=INTEGER(lv);
+    
+    int i=0;
+    vector<int>::const_iterator ini=nsi->begin();
+    vector<int>::const_iterator ili=lsi->begin();
+    for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+      i_tv[i]=*pi;
+      i_nv[i]=*ini++;
+      i_lv[i]=*ili++;
+      i++;
+    }
+    PROTECT(dv = allocVector(VECSXP, 3));   np++;
+    SET_VECTOR_ELT(dv, 0, tv);
+    SET_VECTOR_ELT(dv, 1, nv);
+    SET_VECTOR_ELT(dv, 2, lv);
+    setAttrib(dv, R_NamesSymbol, dnames_R);
+    
+    SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+  }
+
+  setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG  
+  Rprintf("unprotecting %d elements\n",np);
+#endif
+  
+  UNPROTECT(np);
+  return(ans);
+}
+
+
+  int get_a_line(FILE *f,string& line) {
+    line="";
+    char cline[1024];
+    if(fgets(cline,1024,f)) {
+      line+=cline;
+      return(1);
+    } else {
+      return(0);
+    }
+  }
+
+
+  SEXP read_meland(SEXP filename,SEXP read_tag_names_R) {
+
+#ifdef DEBUG  
+  Rprintf("start\n");
+#endif
+  const char* fname=CHAR(asChar(filename));
+  int read_names=*(INTEGER(read_tag_names_R));
+#ifdef DEBUG  
+  Rprintf("fname=%s\n",fname);
+#endif
+
+  // main data vector
+  // chr - pos
+  vector< vector<int> > pos;
+  vector< vector<int> > posnm; // number of mismatches
+  vector< vector<int> > poslen; // length
+  vector< vector<string> > tagnames;
+
+  // chromosome map
+  hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+  vector<string> cnames;
+  
+
+  typedef boost::tokenizer<boost::char_separator<char> >  tokType;
+  boost::char_separator<char> sep(" \t");
+
+  
+  FILE *f=fopen(fname,"rb");
+  if (!f)  { cout<<"can't open input file \""<<fname<<"\"\n"; }
+  
+  Rprintf("opened %s\n",fname);
+
+
+  // read in bed line
+  string line;
+  int fcount=0;
+  while(get_a_line(f,line)) {
+
+#ifdef DEBUG  
+    Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+    tokType tok(line, sep);
+    tokType::iterator sit=tok.begin();
+    if(sit!=tok.end()) {
+      string tagname=*sit++;
+      sit++; 
+      string str_nm=*sit++;
+      int nm=0;
+      if(str_nm[0]=='U') {
+	nm=atoi((str_nm.c_str()+1));
+      } else {
+	continue;
+      }
+      sit++; sit++; sit++;
+      string str_len=*sit++;
+      int len=atoi(str_len.c_str());
+      string chr=*sit++; chr=chr.substr(3,strlen(chr.c_str()));
+      string str_pos=*sit++;
+      int fpos=atoi(str_pos.c_str());
+      
+      // determine the chromosome index
+      hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+      int cind=-1;
+      if(li==cind_map.end()) {
+	// register new chromosome
+	cind=cnames.size();
+	cnames.push_back(chr);
+	cind_map[chr]=cind;
+	// allocate new pos vector
+	pos.push_back(vector<int>());
+	posnm.push_back(vector<int>());
+	poslen.push_back(vector<int>());
+	if(read_names) {
+	  tagnames.push_back(vector<string>());
+	}
+#ifdef DEBUG  
+	Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+      } else {
+	cind=li->second;
+      }
+      fcount++;
+      (pos[cind]).push_back(fpos);
+      (posnm[cind]).push_back(nm);
+      (poslen[cind]).push_back(len);
+      if(read_names) {
+	(tagnames[cind]).push_back(tagname);
+      }
+#ifdef DEBUG  
+      Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+      if(fcount>30) {
+	break;
+      }
+#endif
+      
+    }
+  }
+  fclose(f);
+    
+    
+#ifdef DEBUG  
+  Rprintf("done. read %d fragments\n",fcount);
+#endif
+
+  Rprintf("done. read %d fragments\n",fcount);
+
+    // construct output structures
+  SEXP chnames;
+  int np=0; // number of protections
+  PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+  for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+    SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+  }
+  np++;
+
+  // sort
+  //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+  //  sort(csi->begin(), csi->end(), lessAbsoluteValue());
+  //}
+
+  SEXP ans;
+  PROTECT(ans = allocVector(VECSXP, cnames.size()));   np++;
+  vector<vector<int> >::const_iterator nsi,lsi;
+  vector<vector<string> >::const_iterator ssi;
+  for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+    nsi=posnm.begin()+(csi-pos.begin());
+    lsi=poslen.begin()+(csi-pos.begin());
+
+    SEXP dv,dnames_R;
+    PROTECT(dnames_R = allocVector(STRSXP, 3+read_names)); np++;
+    SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+    SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+    SET_STRING_ELT(dnames_R, 2, mkChar("l"));
+    if(read_names) {
+      SET_STRING_ELT(dnames_R, 3, mkChar("s"));
+    }
+    
+    
+    
+    SEXP tv,nv,lv,sv;
+    PROTECT(tv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(nv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(lv=allocVector(INTSXP,csi->size()));   np++;
+    if(read_names) {
+      PROTECT(sv=allocVector(STRSXP,csi->size()));   np++;
+    }
+    int* i_tv=INTEGER(tv);
+    int* i_nv=INTEGER(nv);
+    int* i_lv=INTEGER(lv);
+    
+    int i=0;
+    vector<int>::const_iterator ini=nsi->begin();
+    vector<int>::const_iterator ili=lsi->begin();
+    for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+      i_tv[i]=*pi;
+      i_nv[i]=*ini++;
+      i_lv[i]=*ili++;
+      i++;
+    }
+    if(read_names) {
+      int i=0;
+      ssi=tagnames.begin()+(csi-pos.begin());
+      for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+	SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+	i++;
+      }
+    }
+    PROTECT(dv = allocVector(VECSXP, 3+read_names));   np++;
+    SET_VECTOR_ELT(dv, 0, tv);
+    SET_VECTOR_ELT(dv, 1, nv);
+    SET_VECTOR_ELT(dv, 2, lv);
+    if(read_names) {
+      SET_VECTOR_ELT(dv, 3, sv);
+    }
+    setAttrib(dv, R_NamesSymbol, dnames_R);
+    
+    SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+  }
+
+  setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG  
+  Rprintf("unprotecting %d elements\n",np);
+#endif
+  
+  UNPROTECT(np);
+  return(ans);
+}
+
+
+
+// reads regular eland files, recording mismatch positions
+SEXP read_eland_mismatches(SEXP filename) {
+
+#ifdef DEBUG  
+  Rprintf("start\n");
+#endif
+  const char* fname=CHAR(asChar(filename));
+#ifdef DEBUG  
+  Rprintf("fname=%s\n",fname);
+#endif
+
+  // main data vector
+  // chr - pos
+  vector< vector<int> > pos;
+  vector< vector<int> > mm1; // position of the first mismatch (or 0 for none)
+  vector< vector<int> > mm2; // position of the second mismatch
+
+  // chromosome map
+  hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+  vector<string> cnames;
+  
+
+  typedef boost::tokenizer<boost::char_separator<char> >  tokType;
+  boost::char_separator<char> sep("\t","",boost::keep_empty_tokens);
+
+  
+  FILE *f=fopen(fname,"rb");
+  if (!f)  { cout<<"can't open input file \""<<fname<<"\"\n"; }
+
+  Rprintf("opened %s\n",fname);
+
+  // read in bed line
+  string line;
+  int fcount=0;
+  while(get_a_line(f,line)) {
+
+#ifdef DEBUG  
+    Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+    tokType tok(line, sep);
+    tokType::iterator sit=tok.begin();
+    if(sit!=tok.end()) {
+      sit++; 
+      string seq=*sit++; 
+      string str_nm=*sit++;
+      int nm=0;
+      if(str_nm[0]=='U') {
+	nm=atoi((str_nm.c_str()+1));
+      } else {
+	continue;
+      }
+      sit++; sit++; sit++;
+      string chr=*sit++; 
+      // extract chromosome name from this
+      int chrp=chr.find("chr");
+      int pp=chr.find('.');
+      chr=chr.substr(chrp+3,pp-chrp-3);
+      
+      string str_pos=*sit++;
+      int fpos=atoi(str_pos.c_str());
+
+
+      string strand=*sit++;
+      int nstrand=0;
+      if(strand=="R") { 
+	fpos=-1*(fpos+seq.size()-1);
+	nstrand=1;
+      }
+
+      sit++;
+      
+      int nm1=0; int nm2=0;
+      if(sit!=tok.end()) {
+	string nms=*sit++;
+	nm1=atoi(nms.substr(0,nms.size()-1).c_str());
+	if(nstrand) { nm1=seq.size()-nm1+1; }
+      }
+      if(sit!=tok.end()) {
+	string nms=*sit++;
+	nm2=atoi(nms.substr(0,nms.size()-1).c_str());
+	if(nstrand) { nm2=seq.size()-nm2+1; }
+      }
+      
+      // determine the chromosome index
+      hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+      int cind=-1;
+      if(li==cind_map.end()) {
+	// register new chromosome
+	cind=cnames.size();
+	cnames.push_back(chr);
+	cind_map[chr]=cind;
+	// allocate new pos vector
+	pos.push_back(vector<int>());
+	mm1.push_back(vector<int>());
+	mm2.push_back(vector<int>());
+#ifdef DEBUG  
+	Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+      } else {
+	cind=li->second;
+      }
+      fcount++;
+      (pos[cind]).push_back(fpos);
+      (mm1[cind]).push_back(nm1);
+      (mm2[cind]).push_back(nm2);
+#ifdef DEBUG  
+      Rprintf("read in position chr=%s cind=%d fpos=%d, nm1=%d, nm2=%d\n",chr.c_str(),cind,fpos,nm1,nm2);
+      if(fcount>30) {
+	break;
+      }
+#endif
+      
+    }
+  }
+  fclose(f);
+    
+    
+#ifdef DEBUG  
+  Rprintf("done. read %d fragments\n",fcount);
+#endif
+
+  Rprintf("done. read %d fragments\n",fcount);
+
+    // construct output structures
+  SEXP chnames;
+  int np=0; // number of protections
+  PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+  for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+    SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+  }
+  np++;
+
+  // sort
+  //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+  //  sort(csi->begin(), csi->end(), lessAbsoluteValue());
+  //}
+
+  SEXP ans;
+  PROTECT(ans = allocVector(VECSXP, cnames.size()));   np++;
+  vector<vector<int> >::const_iterator nsi,lsi;
+  for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+    nsi=mm1.begin()+(csi-pos.begin());
+    lsi=mm2.begin()+(csi-pos.begin());
+
+    SEXP dv,dnames_R;
+    PROTECT(dnames_R = allocVector(STRSXP, 3)); np++;
+    SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+    SET_STRING_ELT(dnames_R, 1, mkChar("f"));
+    SET_STRING_ELT(dnames_R, 2, mkChar("s"));
+    
+    
+    
+    SEXP tv,nv,lv;
+    PROTECT(tv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(nv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(lv=allocVector(INTSXP,csi->size()));   np++;
+    int* i_tv=INTEGER(tv);
+    int* i_nv=INTEGER(nv);
+    int* i_lv=INTEGER(lv);
+    
+    int i=0;
+    vector<int>::const_iterator ini=nsi->begin();
+    vector<int>::const_iterator ili=lsi->begin();
+    for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+      i_tv[i]=*pi;
+      i_nv[i]=*ini++;
+      i_lv[i]=*ili++;
+      i++;
+    }
+    PROTECT(dv = allocVector(VECSXP, 3));   np++;
+    SET_VECTOR_ELT(dv, 0, tv);
+    SET_VECTOR_ELT(dv, 1, nv);
+    SET_VECTOR_ELT(dv, 2, lv);
+    setAttrib(dv, R_NamesSymbol, dnames_R);
+    
+    SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+  }
+
+  setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG  
+  Rprintf("unprotecting %d elements\n",np);
+#endif
+  
+  UNPROTECT(np);
+  return(ans);
+}
+
+
+  // read in regular eland files, adjusting the negative strand coordinate by sequence length
+  SEXP read_eland(SEXP filename,SEXP read_tag_names_R,SEXP eland_tag_length_R) {
+
+#ifdef DEBUG  
+  Rprintf("start\n");
+#endif
+  const char* fname=CHAR(asChar(filename));
+  int read_names=*(INTEGER(read_tag_names_R));
+  int eland_tag_length=*(INTEGER(eland_tag_length_R));
+#ifdef DEBUG  
+  Rprintf("fname=%s\n",fname);
+#endif
+
+  // main data vector
+  // chr - pos
+  vector< vector<int> > pos;
+  vector< vector<int> > posnm; // number of mismatches
+  vector< vector<string> > tagnames;
+
+  // chromosome map
+  hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+  vector<string> cnames;
+  
+
+  typedef boost::tokenizer<boost::char_separator<char> >  tokType;
+  boost::char_separator<char> sep("\t","",boost::keep_empty_tokens);
+
+  
+  FILE *f=fopen(fname,"rb");
+  if (!f)  { cout<<"can't open input file \""<<fname<<"\"\n"; }
+  else {
+  Rprintf("opened %s\n",fname);
+
+  // read in bed line
+  string line;
+  int fcount=0;
+  while(get_a_line(f,line)) {
+
+#ifdef DEBUG  
+    Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+    tokType tok(line, sep);
+    tokType::iterator sit=tok.begin();
+    if(sit!=tok.end()) {
+      string tagname=*sit++;
+      string sequence=*sit++;
+      int len=sequence.size();
+      // adjust probe length if eland length limit was specified
+      if(eland_tag_length>0 && len>eland_tag_length) {
+	len=eland_tag_length;
+      }
+      string str_nm=*sit++;
+      int nm=0;
+      if(str_nm[0]=='U') {
+	nm=atoi((str_nm.c_str()+1));
+      } else {
+	continue;
+      }
+      sit++; sit++; sit++;
+      string chr=*sit++; 
+      string str_pos=*sit++;
+      int fpos=atoi(str_pos.c_str());
+      string str_strand=*sit++;
+
+      if(str_strand[0]=='R') {
+	fpos=-1*(fpos+len-1);
+      }
+      
+      // determine the chromosome index
+      hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+      int cind=-1;
+      if(li==cind_map.end()) {
+	// register new chromosome
+	cind=cnames.size();
+	cnames.push_back(chr);
+	cind_map[chr]=cind;
+	// allocate new pos vector
+	pos.push_back(vector<int>());
+	posnm.push_back(vector<int>());
+	if(read_names) {
+	  tagnames.push_back(vector<string>());
+	}
+#ifdef DEBUG  
+	Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+      } else {
+	cind=li->second;
+      }
+      fcount++;
+      (pos[cind]).push_back(fpos);
+      (posnm[cind]).push_back(nm);
+      if(read_names) {
+	(tagnames[cind]).push_back(tagname);
+      }
+#ifdef DEBUG  
+      Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+      if(fcount>30) {
+	break;
+      }
+#endif
+      
+    }
+  }
+  fclose(f);
+     
+  Rprintf("done. read %d fragments\n",fcount);
+  }
+    // construct output structures
+  SEXP chnames;
+  int np=0; // number of protections
+  PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+  for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+    SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+  }
+  np++;
+
+  // sort
+  //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+  //  sort(csi->begin(), csi->end(), lessAbsoluteValue());
+  //}
+
+  SEXP ans;
+  PROTECT(ans = allocVector(VECSXP, cnames.size()));   np++;
+  vector<vector<int> >::const_iterator nsi;
+  vector<vector<string> >::const_iterator ssi;
+  for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+    nsi=posnm.begin()+(csi-pos.begin());
+
+    SEXP dv,dnames_R;
+    PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++;
+    SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+    SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+    if(read_names) {
+      SET_STRING_ELT(dnames_R, 2, mkChar("s"));
+    }
+    
+    
+    
+    SEXP tv,nv,sv;
+    PROTECT(tv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(nv=allocVector(INTSXP,csi->size()));   np++;
+    if(read_names) {
+      PROTECT(sv=allocVector(STRSXP,csi->size()));   np++;
+    }
+    int* i_tv=INTEGER(tv);
+    int* i_nv=INTEGER(nv);
+    
+    int i=0;
+    vector<int>::const_iterator ini=nsi->begin();
+    for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+      i_tv[i]=*pi;
+      i_nv[i]=*ini++;
+      i++;
+    }
+    if(read_names) {
+      int i=0;
+      ssi=tagnames.begin()+(csi-pos.begin());
+      for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+	SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+	i++;
+      }
+    }
+    PROTECT(dv = allocVector(VECSXP, 2+read_names));   np++;
+    SET_VECTOR_ELT(dv, 0, tv);
+    SET_VECTOR_ELT(dv, 1, nv);
+    if(read_names) {
+      SET_VECTOR_ELT(dv, 2, sv);
+    }
+    setAttrib(dv, R_NamesSymbol, dnames_R);
+    
+    SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+  }
+
+  setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG  
+  Rprintf("unprotecting %d elements\n",np);
+#endif
+  
+  UNPROTECT(np);
+  return(ans);
+}
+
+
+
+  // read in extended eland files, adjusting the negative strand coordinate by sequence length
+  SEXP read_eland_extended(SEXP filename,SEXP read_tag_names_R,SEXP eland_tag_length_R) {
+
+#ifdef DEBUG  
+  Rprintf("start\n");
+#endif
+  const char* fname=CHAR(asChar(filename));
+  int read_names=*(INTEGER(read_tag_names_R));
+  int eland_tag_length=*(INTEGER(eland_tag_length_R));
+#ifdef DEBUG  
+  Rprintf("fname=%s\n",fname);
+#endif
+
+  // main data vector
+  // chr - pos
+  vector< vector<int> > pos;
+  vector< vector<int> > posnm; // number of mismatches
+  vector< vector<string> > tagnames;
+
+  // chromosome map
+  hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+  vector<string> cnames;
+  
+
+  typedef boost::tokenizer<boost::char_separator<char> >  tokType;
+  boost::char_separator<char> sep("\t","",boost::keep_empty_tokens);
+
+  
+  FILE *f=fopen(fname,"rb");
+  if (!f)  { cout<<"can't open input file \""<<fname<<"\"\n"; }
+  else {
+  Rprintf("opened %s\n",fname);
+
+  // read in bed line
+  string line;
+  int fcount=0;
+  while(get_a_line(f,line)) {
+
+#ifdef DEBUG  
+    Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+    tokType tok(line, sep);
+    tokType::iterator sit=tok.begin();
+    if(sit!=tok.end()) {
+      string machinename=*sit++;
+      string runnumber=*sit++;
+      string lanenumber=*sit++;
+      *sit++;
+      
+      string str_x=*sit++;
+      string str_y=*sit++;
+
+      string tagname=machinename+"."+runnumber+"."+lanenumber+"."+str_x+"."+str_y;
+
+      
+
+      *sit++;
+      *sit++;
+
+      
+      string sequence=*sit++;
+      *sit++;
+      
+      string chr=*sit++; 
+      string contig=*sit++; 
+      chr=chr+contig;
+      
+      int len=sequence.size();
+      // adjust probe length if eland length limit was specified
+      if(eland_tag_length>0 && len>eland_tag_length) {
+	len=eland_tag_length;
+      }
+
+
+      
+      string str_pos=*sit++;
+      if(str_pos.size()<1) { continue; }
+      int fpos=atoi(str_pos.c_str());
+      string str_strand=*sit++;
+
+      if(str_strand[0]=='R') {
+	fpos=-1*(fpos+len-1);
+      }
+
+      string str_nm=*sit++;
+      // count non-digit characters
+      int nm=0;
+      for(int i=0;i<str_nm.size();i++) {
+	if(!isdigit(str_nm[i])) { nm++; }
+      }
+      
+      // determine the chromosome index
+      hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+      int cind=-1;
+      if(li==cind_map.end()) {
+	// register new chromosome
+	cind=cnames.size();
+	cnames.push_back(chr);
+	cind_map[chr]=cind;
+	// allocate new pos vector
+	pos.push_back(vector<int>());
+	posnm.push_back(vector<int>());
+	if(read_names) {
+	  tagnames.push_back(vector<string>());
+	}
+#ifdef DEBUG  
+	Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+      } else {
+	cind=li->second;
+      }
+      fcount++;
+      (pos[cind]).push_back(fpos);
+      (posnm[cind]).push_back(nm);
+      if(read_names) {
+	(tagnames[cind]).push_back(tagname);
+      }
+#ifdef DEBUG  
+      Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+      if(fcount>30) {
+	break;
+      }
+#endif
+      
+    }
+  }
+  fclose(f);
+     
+  Rprintf("done. read %d fragments\n",fcount);
+  }
+    // construct output structures
+  SEXP chnames;
+  int np=0; // number of protections
+  PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+  for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+    SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+  }
+  np++;
+
+  // sort
+  //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+  //  sort(csi->begin(), csi->end(), lessAbsoluteValue());
+  //}
+
+  SEXP ans;
+  PROTECT(ans = allocVector(VECSXP, cnames.size()));   np++;
+  vector<vector<int> >::const_iterator nsi;
+  vector<vector<string> >::const_iterator ssi;
+  for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+    nsi=posnm.begin()+(csi-pos.begin());
+
+    SEXP dv,dnames_R;
+    PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++;
+    SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+    SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+    if(read_names) {
+      SET_STRING_ELT(dnames_R, 2, mkChar("s"));
+    }
+    
+    
+    
+    SEXP tv,nv,sv;
+    PROTECT(tv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(nv=allocVector(INTSXP,csi->size()));   np++;
+    if(read_names) {
+      PROTECT(sv=allocVector(STRSXP,csi->size()));   np++;
+    }
+    int* i_tv=INTEGER(tv);
+    int* i_nv=INTEGER(nv);
+    
+    int i=0;
+    vector<int>::const_iterator ini=nsi->begin();
+    for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+      i_tv[i]=*pi;
+      i_nv[i]=*ini++;
+      i++;
+    }
+    if(read_names) {
+      int i=0;
+      ssi=tagnames.begin()+(csi-pos.begin());
+      for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+	SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+	i++;
+      }
+    }
+    PROTECT(dv = allocVector(VECSXP, 2+read_names));   np++;
+    SET_VECTOR_ELT(dv, 0, tv);
+    SET_VECTOR_ELT(dv, 1, nv);
+    if(read_names) {
+      SET_VECTOR_ELT(dv, 2, sv);
+    }
+    setAttrib(dv, R_NamesSymbol, dnames_R);
+    
+    SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+  }
+
+  setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG  
+  Rprintf("unprotecting %d elements\n",np);
+#endif
+  
+  UNPROTECT(np);
+  return(ans);
+}
+
+
+  // read in eland multi files, adjusting the negative strand coordinate by sequence length
+SEXP read_eland_multi(SEXP filename,SEXP read_tag_names_R,SEXP eland_tag_length_R) {
+  
+#ifdef DEBUG  
+  Rprintf("read_eland_muti() : start\n");
+#endif
+  const char* fname=CHAR(asChar(filename));
+  int read_names=*(INTEGER(read_tag_names_R));
+  int eland_tag_length=*(INTEGER(eland_tag_length_R));
+#ifdef DEBUG  
+  Rprintf("fname=%s\n",fname);
+#endif
+
+  // main data vector
+  // chr - pos
+  vector< vector<int> > pos;
+  vector< vector<int> > posnm; // number of mismatches
+  vector< vector<string> > tagnames;
+
+  // chromosome map
+  hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+  vector<string> cnames;
+  
+
+  typedef boost::tokenizer<boost::char_separator<char> >  tokType;
+  boost::char_separator<char> sep(" \t","");
+  boost::char_separator<char> comsep(",","",boost::keep_empty_tokens);
+  boost::char_separator<char> colsep(":","",boost::keep_empty_tokens);
+  
+  
+  FILE *f=fopen(fname,"rb");
+  if (!f)  { cout<<"can't open input file \""<<fname<<"\"\n"; }
+  else {
+  Rprintf("opened %s\n",fname);
+
+  // read in bed line
+  string line;
+  int nline=0;
+  int fcount=0;
+  while(get_a_line(f,line)) {
+    nline++;
+    // chomp
+    size_t elpos = line.find_last_not_of("\n");
+    if(elpos != string::npos) {
+      line = line.substr(0, elpos+1);
+    }
+#ifdef DEBUG  
+    Rprintf("line %d: %s\n",nline,line.c_str());
+#endif
+
+    tokType tok(line, sep);
+    tokType::iterator sit=tok.begin();
+    if(sit!=tok.end()) {
+      string tagname=*sit++;
+      string sequence=*sit++;
+      string mspec=*sit++;
+      // parse out match spec
+      
+      if(mspec=="NM" || mspec=="QC") { continue; }
+#ifdef DEBUG  
+      Rprintf("parsing out spec \"%s\" : ",mspec.c_str());
+#endif
+      
+      tokType stok(mspec, colsep);
+      tokType::iterator ssit=stok.begin();
+      string str_nm0=*ssit++;
+      
+      int nm=0;
+      int nm0=atoi(str_nm0.c_str());
+      if(nm0>1) { 
+#ifdef DEBUG  
+	Rprintf("rejected for nm0\n");
+#endif
+	continue; 
+      }
+      if(nm0==0) {
+	string str_nm1=*ssit++;
+	int nm1=atoi(str_nm1.c_str());
+	if(nm1>1) { 
+#ifdef DEBUG  
+	  Rprintf("rejected for nm1\n");
+#endif
+	  continue; 
+	}
+	if(nm1==0) {
+	  string str_nm2=*ssit++;
+	  int nm2=atoi(str_nm2.c_str());
+	  if(nm2>1) { 
+#ifdef DEBUG  
+	    Rprintf("rejected for nm2\n");
+#endif
+	    continue; 
+	  }
+	  nm=2;
+	} else {
+	  nm=1;
+	}
+      }
+
+#ifdef DEBUG  
+      Rprintf("accepted (nm=%d)\n",nm);
+#endif
+      int npos=0;
+      string mpos=*sit++;
+      vector<string> mposc;
+      vector<int> mposp;
+      tokType ptok(mpos, comsep);
+      string prevchr;
+      for(tokType::iterator psit=ptok.begin();psit!=ptok.end();psit++) {
+	string cpos=*psit;
+	npos++;
+	int strand=1;
+	if(cpos.size()<5) {
+	  Rprintf("ERROR: line=%d, match %d is too short: \"%s\"; ",nline,npos,cpos.c_str());
+	}
+	char lc=cpos.at(cpos.size()-1);
+	
+	if(atoi(&lc)==nm) {
+	  switch(cpos.at(cpos.size()-2)) {
+	  case 'R': strand=-1; break;
+	  case 'F': strand=1; break;
+	  default:
+	    Rprintf("ERROR: line=%d, match %d specifies an invalid strand %c\n",nline,npos,cpos.at(cpos.size()-2)); break;
+	    continue;
+	  }
+          string chr,str_pos;
+	  size_t colpos=cpos.find(":");
+	  if(colpos==string::npos) {
+            if(npos>1) {
+              chr=prevchr;
+              str_pos=cpos.substr(0,cpos.size()-2);
+            } else {
+	      Rprintf("ERROR: line=%d, match %d does not contain chromosome separator: \"%s\"\n",nline,npos,cpos.c_str()); 
+	      continue;
+            }
+	  } else {
+	      chr=cpos.substr(0,colpos);
+	      str_pos=cpos.substr(colpos+1,cpos.size()-3-colpos);
+          }
+#ifdef DEBUG  
+	  Rprintf("\"%s\" : chr=%s, pos=%s, strand=%d\n",cpos.c_str(),chr.c_str(),str_pos.c_str(),strand);
+#endif	  
+	  int pos=strand*atoi(str_pos.c_str());
+	  mposc.push_back(chr);
+	  mposp.push_back(pos);
+	}
+      }
+
+      string chr;
+      int fpos;
+      if(mposc.size()!=1) {
+	if(mposc.size()==0) {
+	  Rprintf("ERROR: line=%d: no %d-mismatch matches were found in \"%s\"\n",nline,nm,mpos.c_str()); 
+	} else {
+	  Rprintf("ERROR: line=%d: more than one (%d) %d-mismatch matches were found in \"%s\"\n",nline,mposc.size(),nm,mpos.c_str()); 
+	}
+	continue;
+      } else {
+	chr=*mposc.begin();
+	fpos=*mposp.begin();
+      }
+      
+      int len=sequence.size();
+      // adjust probe length if eland length limit was specified
+      if(eland_tag_length>0 && len>eland_tag_length) {
+	len=eland_tag_length;
+      }
+
+      if(fpos<0) {
+	fpos=-1*(-1*fpos+len-1);
+      }
+      
+      // determine the chromosome index
+      hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+      int cind=-1;
+      if(li==cind_map.end()) {
+	// register new chromosome
+	cind=cnames.size();
+	cnames.push_back(chr);
+	cind_map[chr]=cind;
+	// allocate new pos vector
+	pos.push_back(vector<int>());
+	posnm.push_back(vector<int>());
+	if(read_names) {
+	  tagnames.push_back(vector<string>());
+	}
+#ifdef DEBUG  
+	Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+      } else {
+	cind=li->second;
+      }
+      fcount++;
+      (pos[cind]).push_back(fpos);
+      (posnm[cind]).push_back(nm);
+      if(read_names) {
+	(tagnames[cind]).push_back(tagname);
+      }
+#ifdef DEBUG  
+      Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+      if(fcount>30) {
+	break;
+      }
+#endif
+      
+    }
+  }
+  fclose(f);
+     
+  Rprintf("done. read %d fragments\n",fcount);
+  }
+    // construct output structures
+  SEXP chnames;
+  int np=0; // number of protections
+  PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+  for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+    SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+  }
+  np++;
+
+  // sort
+  //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+  //  sort(csi->begin(), csi->end(), lessAbsoluteValue());
+  //}
+
+  SEXP ans;
+  PROTECT(ans = allocVector(VECSXP, cnames.size()));   np++;
+  vector<vector<int> >::const_iterator nsi;
+  vector<vector<string> >::const_iterator ssi;
+  for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+    nsi=posnm.begin()+(csi-pos.begin());
+
+    SEXP dv,dnames_R;
+    PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++;
+    SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+    SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+    if(read_names) {
+      SET_STRING_ELT(dnames_R, 2, mkChar("s"));
+    }
+    
+    
+    
+    SEXP tv,nv,sv;
+    PROTECT(tv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(nv=allocVector(INTSXP,csi->size()));   np++;
+    if(read_names) {
+      PROTECT(sv=allocVector(STRSXP,csi->size()));   np++;
+    }
+    int* i_tv=INTEGER(tv);
+    int* i_nv=INTEGER(nv);
+    
+    int i=0;
+    vector<int>::const_iterator ini=nsi->begin();
+    for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+      i_tv[i]=*pi;
+      i_nv[i]=*ini++;
+      i++;
+    }
+    if(read_names) {
+      int i=0;
+      ssi=tagnames.begin()+(csi-pos.begin());
+      for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+	SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+	i++;
+      }
+    }
+    PROTECT(dv = allocVector(VECSXP, 2+read_names));   np++;
+    SET_VECTOR_ELT(dv, 0, tv);
+    SET_VECTOR_ELT(dv, 1, nv);
+    if(read_names) {
+      SET_VECTOR_ELT(dv, 2, sv);
+    }
+    setAttrib(dv, R_NamesSymbol, dnames_R);
+    
+    SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+  }
+
+  setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG  
+  Rprintf("unprotecting %d elements\n",np);
+#endif
+  
+  UNPROTECT(np);
+  return(ans);
+}
+
+
+  // read in regular eland files, adjusting the negative strand coordinate by sequence length
+  SEXP read_bowtie(SEXP filename,SEXP read_tag_names_R) {
+
+#ifdef DEBUG  
+  Rprintf("start\n");
+#endif
+  const char* fname=CHAR(asChar(filename));
+  int read_names=*(INTEGER(read_tag_names_R));
+#ifdef DEBUG  
+  Rprintf("fname=%s\n",fname);
+#endif
+
+  // main data vector
+  // chr - pos
+  vector< vector<int> > pos;
+  vector< vector<int> > posnm; // number of mismatches
+  vector< vector<string> > tagnames;
+
+  // chromosome map
+  hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+  vector<string> cnames;
+  
+
+  typedef boost::tokenizer<boost::char_separator<char> >  tokType;
+  boost::char_separator<char> sep("\t","",boost::keep_empty_tokens);
+  boost::char_separator<char> sep2(",");
+
+  
+  FILE *f=fopen(fname,"rb");
+  if (!f)  { cout<<"can't open input file \""<<fname<<"\"\n"; 
+  } else {
+#ifdef HAVE_LIBBZ2
+    BZFILE* b;  
+    int bzerror;
+    
+    int bz2file=0;
+    if(strstr(fname,".bz2")) {
+      bz2file=1;
+      b=BZ2_bzReadOpen (&bzerror, f, 0, 0, NULL, 0);
+      if (bzerror != BZ_OK)  { cout<<"bzerror="<<bzerror<<endl; }
+    }
+#endif
+
+  Rprintf("opened %s\n",fname);
+
+  // read in bed line
+  string line;
+  int fcount=0;
+#ifdef HAVE_LIBBZ2
+  while(get_a_line(f,b,bz2file,line)) {
+#else
+  while(get_a_line(f,line)) {
+#endif
+
+#ifdef DEBUG  
+    Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+    tokType tok(line, sep);
+    tokType::iterator sit=tok.begin();
+    if(sit!=tok.end()) {
+      string tagname=*sit++;
+      string str_strand=*sit++;
+      string chr=*sit++; 
+
+      string str_pos=*sit++;
+      int fpos=atoi(str_pos.c_str());
+
+      string sequence=*sit++;
+      sit++; sit++;
+      string mm=*sit++;
+
+      int len=sequence.size();
+      if(str_strand[0]=='-') {
+	fpos=-1*(fpos+len-1);
+      }
+      // determine number of mismatches
+      int nm=0;
+      if(mm.size()>0) {
+	nm++;
+	string::size_type tp(0);
+	while(tp!=string::npos) {
+	  tp = mm.find(",",tp);
+	  if(tp!=string::npos) {
+	    tp++;
+	    ++nm;
+	  }
+	}
+      }
+
+
+      
+      // determine the chromosome index
+      hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+      int cind=-1;
+      if(li==cind_map.end()) {
+	// register new chromosome
+	cind=cnames.size();
+	cnames.push_back(chr);
+	cind_map[chr]=cind;
+	// allocate new pos vector
+	pos.push_back(vector<int>());
+	posnm.push_back(vector<int>());
+	if(read_names) {
+	  tagnames.push_back(vector<string>());
+	}
+#ifdef DEBUG  
+	Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+      } else {
+	cind=li->second;
+      }
+      fcount++;
+      (pos[cind]).push_back(fpos);
+      (posnm[cind]).push_back(nm);
+      if(read_names) {
+	(tagnames[cind]).push_back(tagname);
+      }
+#ifdef DEBUG  
+      Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+      if(fcount>30) {
+	break;
+      }
+#endif
+      
+    }
+  }
+
+#ifdef HAVE_LIBBZ2
+  BZ2_bzReadClose( &bzerror, b);
+#endif
+ fclose(f);
+     
+  Rprintf("done. read %d fragments\n",fcount);
+  }
+    // construct output structures
+  SEXP chnames;
+  int np=0; // number of protections
+  PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+  for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+    SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+  }
+  np++;
+
+  // sort
+  //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+  //  sort(csi->begin(), csi->end(), lessAbsoluteValue());
+  //}
+
+  SEXP ans;
+  PROTECT(ans = allocVector(VECSXP, cnames.size()));   np++;
+  vector<vector<int> >::const_iterator nsi;
+  vector<vector<string> >::const_iterator ssi;
+  for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+    nsi=posnm.begin()+(csi-pos.begin());
+
+    SEXP dv,dnames_R;
+    PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++;
+    SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+    SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+    if(read_names) {
+      SET_STRING_ELT(dnames_R, 2, mkChar("s"));
+    }
+    
+    
+    
+    SEXP tv,nv,sv;
+    PROTECT(tv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(nv=allocVector(INTSXP,csi->size()));   np++;
+    if(read_names) {
+      PROTECT(sv=allocVector(STRSXP,csi->size()));   np++;
+    }
+    int* i_tv=INTEGER(tv);
+    int* i_nv=INTEGER(nv);
+    
+    int i=0;
+    vector<int>::const_iterator ini=nsi->begin();
+    for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+      i_tv[i]=*pi;
+      i_nv[i]=*ini++;
+      i++;
+    }
+    if(read_names) {
+      int i=0;
+      ssi=tagnames.begin()+(csi-pos.begin());
+      for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+	SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+	i++;
+      }
+    }
+    PROTECT(dv = allocVector(VECSXP, 2+read_names));   np++;
+    SET_VECTOR_ELT(dv, 0, tv);
+    SET_VECTOR_ELT(dv, 1, nv);
+    if(read_names) {
+      SET_VECTOR_ELT(dv, 2, sv);
+    }
+    setAttrib(dv, R_NamesSymbol, dnames_R);
+    
+    SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+  }
+
+  setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG  
+  Rprintf("unprotecting %d elements\n",np);
+#endif
+  
+  UNPROTECT(np);
+  return(ans);
+}
+
+
+  // read in helicos tab-separated alignment output (regular or bz2)
+  SEXP read_helicostabf(SEXP filename,SEXP read_tag_names_R) {
+
+#ifdef DEBUG  
+  Rprintf("start\n");
+#endif
+  const char* fname=CHAR(asChar(filename));
+  int read_names=*(INTEGER(read_tag_names_R));
+#ifdef DEBUG  
+  Rprintf("fname=%s\n",fname);
+#endif
+
+  // main data vector
+  // chr - pos
+  vector< vector<int> > pos;
+  vector< vector<int> > posnm; // number of mismatches
+  vector< vector<int> > poslen; // length of the match
+  vector< vector<string> > tagnames;
+
+  // chromosome map
+  hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+  vector<string> cnames;
+  
+
+  typedef boost::tokenizer<boost::char_separator<char> >  tokType;
+  boost::char_separator<char> sep("\t","",boost::keep_empty_tokens);
+  boost::char_separator<char> sep2(",");
+
+  
+  FILE *f=fopen(fname,"rb");
+  if (!f)  { cout<<"can't open input file \""<<fname<<"\"\n"; 
+  } else {
+#ifdef HAVE_LIBBZ2
+    BZFILE* b;  
+    int bzerror;
+    
+    int bz2file=0;
+    if(strstr(fname,".bz2")) {
+      bz2file=1;
+      b=BZ2_bzReadOpen (&bzerror, f, 0, 0, NULL, 0);
+      if (bzerror != BZ_OK)  { cout<<"bzerror="<<bzerror<<endl; }
+    }
+#endif
+
+  Rprintf("opened %s\n",fname);
+
+  // read in bed line
+  string line;
+  int fcount=0;
+  int nlines=0;
+#ifdef HAVE_LIBBZ2
+  while(get_a_line(f,b,bz2file,line)) {
+#else
+  while(get_a_line(f,line)) {
+#endif
+
+#ifdef DEBUG  
+    Rprintf("line: %s\n",line.c_str());
+#endif
+    nlines++;
+    // skip comments
+    if(line[0]=='#') { continue; }
+    if(line.compare(0,12,"Reference_ID")==0) { 
+#ifdef DEBUG  
+      Rprintf("matched header on line %d\n",nlines); 
+#endif
+      continue; 
+    }
+
+    tokType tok(line, sep);
+    tokType::iterator sit=tok.begin();
+    if(sit!=tok.end()) {
+      string chr=*sit++; 
+      string tagname=*sit++;
+      string str_startpos=*sit++;
+      string str_endpos=*sit++;
+
+      string str_tstart=*sit++;
+      string str_tend=*sit++;
+      int len=atoi(str_tend.c_str())-atoi(str_tstart.c_str());
+
+      sit++; sit++;
+      string str_ndel=*sit++;
+      string str_nins=*sit++;
+      string str_nsub=*sit++;
+      
+      string str_strand=*sit++;
+      int fpos;
+      if(str_strand[0]=='-') {
+	fpos=-1*atoi(str_endpos.c_str()); 
+      } else {
+	fpos=atoi(str_startpos.c_str()); 
+      }
+
+      // determine number of mismatches
+      int nm=atoi(str_ndel.c_str())+atoi(str_nins.c_str())+atoi(str_nsub.c_str());
+      
+      // determine the chromosome index
+      hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+      int cind=-1;
+      if(li==cind_map.end()) {
+	// register new chromosome
+	cind=cnames.size();
+	cnames.push_back(chr);
+	cind_map[chr]=cind;
+	// allocate new pos vector
+	pos.push_back(vector<int>());
+	posnm.push_back(vector<int>());
+	poslen.push_back(vector<int>());
+	if(read_names) {
+	  tagnames.push_back(vector<string>());
+	}
+#ifdef DEBUG  
+	Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+      } else {
+	cind=li->second;
+      }
+      fcount++;
+      (pos[cind]).push_back(fpos);
+      (posnm[cind]).push_back(nm);
+      (poslen[cind]).push_back(len);
+      if(read_names) {
+	(tagnames[cind]).push_back(tagname);
+      }
+#ifdef DEBUG  
+      Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d\n",chr.c_str(),cind,fpos,nm);
+      if(fcount>30) {
+	break;
+      }
+#endif
+      
+    }
+  }
+
+#ifdef HAVE_LIBBZ2
+  BZ2_bzReadClose( &bzerror, b);
+#endif
+  fclose(f);
+     
+  Rprintf("done. read %d fragments\n",fcount);
+  }
+    // construct output structures
+  SEXP chnames;
+  int np=0; // number of protections
+  PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+  for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+    SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+  }
+  np++;
+
+  // sort
+  //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+  //  sort(csi->begin(), csi->end(), lessAbsoluteValue());
+  //}
+
+  SEXP ans;
+  PROTECT(ans = allocVector(VECSXP, cnames.size()));   np++;
+  vector<vector<int> >::const_iterator nsi;
+  vector<vector<int> >::const_iterator lsi;
+  vector<vector<string> >::const_iterator ssi;
+  for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+    nsi=posnm.begin()+(csi-pos.begin());
+    lsi=poslen.begin()+(csi-pos.begin());
+
+    SEXP dv,dnames_R;
+    PROTECT(dnames_R = allocVector(STRSXP, 3+read_names)); np++;
+    SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+    SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+    SET_STRING_ELT(dnames_R, 2, mkChar("l"));
+    if(read_names) {
+      SET_STRING_ELT(dnames_R, 3, mkChar("s"));
+    }
+    
+    
+    
+    SEXP tv,nv,lv,sv;
+    PROTECT(tv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(nv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(lv=allocVector(INTSXP,csi->size()));   np++;
+    if(read_names) {
+      PROTECT(sv=allocVector(STRSXP,csi->size()));   np++;
+    }
+    int* i_tv=INTEGER(tv);
+    int* i_nv=INTEGER(nv);
+    int* i_lv=INTEGER(lv);
+    
+    int i=0;
+    vector<int>::const_iterator ini=nsi->begin();
+    vector<int>::const_iterator lni=lsi->begin();
+    for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+      i_tv[i]=*pi;
+      i_nv[i]=*ini++;
+      i_lv[i]=*lni++;
+      i++;
+    }
+    if(read_names) {
+      int i=0;
+      ssi=tagnames.begin()+(csi-pos.begin());
+      for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+	SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+	i++;
+      }
+    }
+    PROTECT(dv = allocVector(VECSXP, 3+read_names));   np++;
+    SET_VECTOR_ELT(dv, 0, tv);
+    SET_VECTOR_ELT(dv, 1, nv);
+    SET_VECTOR_ELT(dv, 2, lv);
+    if(read_names) {
+      SET_VECTOR_ELT(dv, 3, sv);
+    }
+    setAttrib(dv, R_NamesSymbol, dnames_R);
+    
+    SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+  }
+
+  setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG  
+  Rprintf("unprotecting %d elements\n",np);
+#endif
+  
+  UNPROTECT(np);
+  return(ans);
+}
+
+
+
+  // read in text version of maq map
+  SEXP read_maqmap(SEXP filename,SEXP read_tag_names_R) {
+
+#ifdef DEBUG  
+  Rprintf("start\n");
+#endif
+  const char* fname=CHAR(asChar(filename));
+  int read_names=*(INTEGER(read_tag_names_R));
+#ifdef DEBUG  
+  Rprintf("fname=%s\n",fname);
+#endif
+
+  // main data vector
+  // chr - pos
+  vector< vector<int> > pos;
+  vector< vector<int> > posnm; // number of mismatches
+  vector< vector<string> > tagnames;
+
+  // chromosome map
+  hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+  vector<string> cnames;
+  
+
+  typedef boost::tokenizer<boost::char_separator<char> >  tokType;
+  boost::char_separator<char> sep("\t","",boost::keep_empty_tokens);
+
+  
+  FILE *f=fopen(fname,"rb");
+  if (!f)  { cout<<"can't open input file \""<<fname<<"\"\n"; }
+  else {
+  Rprintf("opened %s\n",fname);
+
+  // read in bed line
+  string line;
+  int fcount=0;
+  while(get_a_line(f,line)) {
+
+#ifdef DEBUG  
+    Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+    tokType tok(line, sep);
+    tokType::iterator sit=tok.begin();
+    if(sit!=tok.end()) {
+      string tagname=*sit++;
+      string chr=*sit++;
+      string str_pos=*sit++;
+      int fpos=atoi(str_pos.c_str());
+      string str_strand=*sit++;
+      sit++; sit++; sit++; sit++; sit++; 
+      string str_nm=*sit++;
+      sit++; sit++; sit++; 
+      string str_len=*sit++;
+      int nm=atoi(str_nm.c_str());
+      int len=atoi(str_len.c_str());
+
+      if(str_strand[0]=='-') {
+	fpos=-1*(fpos+len-1);
+      }
+
+      // determine the chromosome index
+      hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+      int cind=-1;
+      if(li==cind_map.end()) {
+	// register new chromosome
+	cind=cnames.size();
+	cnames.push_back(chr);
+	cind_map[chr]=cind;
+	// allocate new pos vector
+	pos.push_back(vector<int>());
+	posnm.push_back(vector<int>());
+	if(read_names) {
+	  tagnames.push_back(vector<string>());
+	}
+#ifdef DEBUG  
+	Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+      } else {
+	cind=li->second;
+      }
+      fcount++;
+      (pos[cind]).push_back(fpos);
+      (posnm[cind]).push_back(nm);
+      if(read_names) {
+	(tagnames[cind]).push_back(tagname);
+      }
+#ifdef DEBUG  
+      Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+      if(fcount>30) {
+	break;
+      }
+#endif
+      
+    }
+  }
+  fclose(f);
+     
+  Rprintf("done. read %d fragments\n",fcount);
+  }
+    // construct output structures
+  SEXP chnames;
+  int np=0; // number of protections
+  PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+  for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+    SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+  }
+  np++;
+
+  // sort
+  //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+  //  sort(csi->begin(), csi->end(), lessAbsoluteValue());
+  //}
+
+  SEXP ans;
+  PROTECT(ans = allocVector(VECSXP, cnames.size()));   np++;
+  vector<vector<int> >::const_iterator nsi;
+  vector<vector<string> >::const_iterator ssi;
+  for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+    nsi=posnm.begin()+(csi-pos.begin());
+
+    SEXP dv,dnames_R;
+    PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++;
+    SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+    SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+    if(read_names) {
+      SET_STRING_ELT(dnames_R, 2, mkChar("s"));
+    }
+    
+    
+    
+    SEXP tv,nv,sv;
+    PROTECT(tv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(nv=allocVector(INTSXP,csi->size()));   np++;
+    if(read_names) {
+      PROTECT(sv=allocVector(STRSXP,csi->size()));   np++;
+    }
+    int* i_tv=INTEGER(tv);
+    int* i_nv=INTEGER(nv);
+    
+    int i=0;
+    vector<int>::const_iterator ini=nsi->begin();
+    for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+      i_tv[i]=*pi;
+      i_nv[i]=*ini++;
+      i++;
+    }
+    if(read_names) {
+      int i=0;
+      ssi=tagnames.begin()+(csi-pos.begin());
+      for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+	SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+	i++;
+      }
+    }
+    PROTECT(dv = allocVector(VECSXP, 2+read_names));   np++;
+    SET_VECTOR_ELT(dv, 0, tv);
+    SET_VECTOR_ELT(dv, 1, nv);
+    if(read_names) {
+      SET_VECTOR_ELT(dv, 2, sv);
+    }
+    setAttrib(dv, R_NamesSymbol, dnames_R);
+    
+    SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+  }
+
+  setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG  
+  Rprintf("unprotecting %d elements\n",np);
+#endif
+  
+  UNPROTECT(np);
+  return(ans);
+}
+
+
+
+
+
+  // read in tagalign file
+  SEXP read_tagalign(SEXP filename) {
+
+#ifdef DEBUG  
+  Rprintf("start\n");
+#endif
+  const char* fname=CHAR(asChar(filename));
+#ifdef DEBUG  
+  Rprintf("fname=%s\n",fname);
+#endif
+
+  // main data vector
+  // chr - pos
+  vector< vector<int> > pos;
+  vector< vector<int> > posnm; // number of mismatches
+
+  // chromosome map
+  hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+  vector<string> cnames;
+  
+
+  typedef boost::tokenizer<boost::char_separator<char> >  tokType;
+  boost::char_separator<char> sep(" \t");
+
+  
+  FILE *f=fopen(fname,"rb");
+  if (!f)  { cout<<"can't open input file \""<<fname<<"\"\n"; }
+  else {
+  Rprintf("opened %s\n",fname);
+
+  // read in bed line
+  string line;
+  int fcount=0;
+  while(get_a_line(f,line)) {
+
+#ifdef DEBUG  
+    Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+    tokType tok(line, sep);
+    tokType::iterator sit=tok.begin();
+    if(sit!=tok.end()) {
+      string chr=*sit++;
+      string str_spos=*sit++;
+      string str_epos=*sit++;
+      sit++; 
+      string str_qual=*sit++;
+      string str_strand=*sit;
+
+      int fpos;
+      if(str_strand[0]=='+') {
+	fpos=atoi(str_spos.c_str());
+      } else {
+	fpos=-1*atoi(str_epos.c_str());
+      }
+      int nm=atoi(str_qual.c_str());
+      
+      // determine the chromosome index
+      hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+      int cind=-1;
+      if(li==cind_map.end()) {
+	// register new chromosome
+	cind=cnames.size();
+	cnames.push_back(chr);
+	cind_map[chr]=cind;
+	// allocate new pos vector
+	pos.push_back(vector<int>());
+	posnm.push_back(vector<int>());
+#ifdef DEBUG  
+	Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+      } else {
+	cind=li->second;
+      }
+      fcount++;
+      (pos[cind]).push_back(fpos);
+      (posnm[cind]).push_back(nm);
+#ifdef DEBUG  
+      Rprintf("read in position chr=%s cind=%d fpos=%d nm=%d\n",chr.c_str(),cind,fpos,nm);
+      if(fcount>30) {
+	break;
+      }
+#endif
+      
+    }
+  }
+  fclose(f);
+     
+  Rprintf("done. read %d fragments\n",fcount);
+  }
+    // construct output structures
+  SEXP chnames;
+  int np=0; // number of protections
+  PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+  for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+    SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+  }
+  np++;
+
+  // sort
+  //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+  //  sort(csi->begin(), csi->end(), lessAbsoluteValue());
+  //}
+
+  SEXP ans;
+  PROTECT(ans = allocVector(VECSXP, cnames.size()));   np++;
+  vector<vector<int> >::const_iterator nsi;
+  for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+    nsi=posnm.begin()+(csi-pos.begin());
+
+    SEXP dv,dnames_R;
+    PROTECT(dnames_R = allocVector(STRSXP, 2)); np++;
+    SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+    SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+    
+    
+    SEXP tv,nv;
+    PROTECT(tv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(nv=allocVector(INTSXP,csi->size()));   np++;
+    int* i_tv=INTEGER(tv);
+    int* i_nv=INTEGER(nv);
+    
+    int i=0;
+    vector<int>::const_iterator ini=nsi->begin();
+    for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+      i_tv[i]=*pi;
+      i_nv[i]=*ini++;
+      i++;
+    }
+    PROTECT(dv = allocVector(VECSXP, 2));   np++;
+    SET_VECTOR_ELT(dv, 0, tv);
+    SET_VECTOR_ELT(dv, 1, nv);
+    setAttrib(dv, R_NamesSymbol, dnames_R);
+    
+    SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+  }
+
+  setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG  
+  Rprintf("unprotecting %d elements\n",np);
+#endif
+  
+  UNPROTECT(np);
+  return(ans);
+}
+
+
+
+
+  // arachne madness
+  SEXP read_arachne(SEXP filename) {
+
+#ifdef DEBUG  
+  Rprintf("start\n");
+#endif
+  const char* fname=CHAR(asChar(filename));
+#ifdef DEBUG  
+  Rprintf("fname=%s\n",fname);
+#endif
+
+  // main data vector
+  // chr - pos
+  vector< vector<int> > pos;
+  vector< vector<int> > posnm; // number of mismatches
+
+  // chromosome map
+  hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+  vector<string> cnames;
+  
+
+  typedef boost::tokenizer<boost::char_separator<char> >  tokType;
+  boost::char_separator<char> sep(" \t");
+
+  
+
+
+
+  FILE *f=fopen(fname,"rb");
+  if (!f)  { cout<<"can't open input file \""<<fname<<"\"\n"; }
+  else {
+
+#ifdef HAVE_LIBBZ2
+    BZFILE* b;  
+    int bzerror;
+    
+    int bz2file=0;
+    if(strstr(fname,".bz2")) {
+      bz2file=1;
+      b=BZ2_bzReadOpen (&bzerror, f, 0, 0, NULL, 0);
+      if (bzerror != BZ_OK)  { cout<<"bzerror="<<bzerror<<endl; }
+    }
+#endif
+
+
+  Rprintf("opened %s\n",fname);
+
+  // read in bed line
+  string line;
+  int fcount=0;
+#ifdef HAVE_LIBBZ2
+  while(get_a_line(f,b,bz2file,line)) {
+#else
+  while(get_a_line(f,line)) {
+#endif
+
+#ifdef DEBUG  
+    Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+    tokType tok(line, sep);
+    tokType::iterator sit=tok.begin();
+    if(sit!=tok.end()) {
+      string chr=*sit++;
+      string str_spos=*sit++;
+      int nm=0;
+      if(sit!=tok.end()) {
+	string str_mm=*sit;
+	nm=atoi(str_mm.c_str());
+      }
+      
+      int fpos=atoi(str_spos.c_str());;
+      
+      
+      // determine the chromosome index
+      hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+      int cind=-1;
+      if(li==cind_map.end()) {
+	// register new chromosome
+	cind=cnames.size();
+	cnames.push_back(chr);
+	cind_map[chr]=cind;
+	// allocate new pos vector
+	pos.push_back(vector<int>());
+	posnm.push_back(vector<int>());
+#ifdef DEBUG  
+	Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+      } else {
+	cind=li->second;
+      }
+      fcount++;
+      (pos[cind]).push_back(fpos);
+      (posnm[cind]).push_back(nm);
+#ifdef DEBUG  
+      Rprintf("read in position chr=%s cind=%d fpos=%d nm=%d\n",chr.c_str(),cind,fpos,nm);
+      if(fcount>30) {
+	break;
+      }
+#endif
+      
+    }
+  }
+#ifdef HAVE_LIBBZ2
+  BZ2_bzReadClose( &bzerror, b);
+#endif
+
+  fclose(f);
+     
+  Rprintf("done. read %d fragments\n",fcount);
+  }
+    // construct output structures
+  SEXP chnames;
+  int np=0; // number of protections
+  PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+  for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+    SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+  }
+  np++;
+
+  // sort
+  //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+  //  sort(csi->begin(), csi->end(), lessAbsoluteValue());
+  //}
+
+  SEXP ans;
+  PROTECT(ans = allocVector(VECSXP, cnames.size()));   np++;
+  vector<vector<int> >::const_iterator nsi;
+  for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+    nsi=posnm.begin()+(csi-pos.begin());
+
+    SEXP dv,dnames_R;
+    PROTECT(dnames_R = allocVector(STRSXP, 2)); np++;
+    SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+    SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+    
+    
+    SEXP tv,nv;
+    PROTECT(tv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(nv=allocVector(INTSXP,csi->size()));   np++;
+    int* i_tv=INTEGER(tv);
+    int* i_nv=INTEGER(nv);
+    
+    int i=0;
+    vector<int>::const_iterator ini=nsi->begin();
+    for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+      i_tv[i]=*pi;
+      i_nv[i]=*ini++;
+      i++;
+    }
+    PROTECT(dv = allocVector(VECSXP, 2));   np++;
+    SET_VECTOR_ELT(dv, 0, tv);
+    SET_VECTOR_ELT(dv, 1, nv);
+    setAttrib(dv, R_NamesSymbol, dnames_R);
+    
+    SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+  }
+
+  setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG  
+  Rprintf("unprotecting %d elements\n",np);
+#endif
+  
+  UNPROTECT(np);
+  return(ans);
+}
+
+
+  // arachne madness
+  SEXP read_arachne_long(SEXP filename) {
+
+#ifdef DEBUG  
+  Rprintf("start\n");
+#endif
+  const char* fname=CHAR(asChar(filename));
+#ifdef DEBUG  
+  Rprintf("fname=%s\n",fname);
+#endif
+
+  // main data vector
+  // chr - pos
+  vector< vector<int> > pos;
+  vector< vector<int> > posnm; // number of mismatches
+  vector< vector<int> > poslen; // length of the match
+
+  // chromosome map
+  hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+  vector<string> cnames;
+  
+
+  typedef boost::tokenizer<boost::char_separator<char> >  tokType;
+  boost::char_separator<char> sep(" \t");
+
+  
+
+
+
+  FILE *f=fopen(fname,"rb");
+  if (!f)  { cout<<"can't open input file \""<<fname<<"\"\n"; }
+  else {
+
+#ifdef HAVE_LIBBZ2
+    BZFILE* b;  
+    int bzerror;
+    
+    int bz2file=0;
+    if(strstr(fname,".bz2")) {
+      bz2file=1;
+      b=BZ2_bzReadOpen (&bzerror, f, 0, 0, NULL, 0);
+      if (bzerror != BZ_OK)  { cout<<"bzerror="<<bzerror<<endl; }
+    }
+#endif
+
+
+  Rprintf("opened %s\n",fname);
+
+  // read in bed line
+  string line;
+  int fcount=0;
+#ifdef HAVE_LIBBZ2
+  while(get_a_line(f,b,bz2file,line)) {
+#else
+  while(get_a_line(f,line)) {
+#endif
+
+#ifdef DEBUG  
+    Rprintf("line: %s\n",line.c_str());
+#endif
+
+
+    tokType tok(line, sep);
+    tokType::iterator sit=tok.begin();
+    if(sit!=tok.end()) {
+      string query=*sit++;
+      if(query!="QUERY") { continue; }
+      *sit++; *sit++; *sit++; *sit++; 
+      string str_strand=*sit++;
+      string chr=*sit++;
+      string str_startpos=*sit++;
+      string str_endpos=*sit++;
+      
+      int fpos;
+      if(str_strand[0]=='1') {
+	fpos=-1*atoi(str_endpos.c_str()); 
+      } else {
+	fpos=atoi(str_startpos.c_str()); 
+      }
+#ifdef DEBUG  
+      Rprintf("chr=%s, fpos=%d\n",chr.c_str(),fpos);
+#endif
+      *sit++;
+      string str_nblocks=*sit++;
+      int nblocks=atoi(str_nblocks.c_str());
+#ifdef DEBUG  
+      Rprintf("nblocks=%d\n",nblocks);
+#endif
+      // tally up the read length and the number of mismatches for all blocks
+      int len=0; int nm=0;
+      for(int i=0;i<nblocks;i++) {
+	string str_sgs=*sit++;
+	int sgs=atoi(str_sgs.c_str());
+	string str_slen=*sit++;
+	int slen=atoi(str_slen.c_str());
+	string str_snm=*sit++;
+	int snm=atoi(str_snm.c_str());
+#ifdef DEBUG  
+	Rprintf("sgs=%d, slen=%d, snm=%d\n",sgs,slen,snm);
+#endif
+	len+=slen;
+	nm+=abs(sgs)+snm;
+      }
+      nm+=nblocks-1;
+      
+      
+      // determine the chromosome index
+      hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+      int cind=-1;
+      if(li==cind_map.end()) {
+	// register new chromosome
+	cind=cnames.size();
+	cnames.push_back(chr);
+	cind_map[chr]=cind;
+	// allocate new pos vector
+	pos.push_back(vector<int>());
+	posnm.push_back(vector<int>());
+	poslen.push_back(vector<int>());
+#ifdef DEBUG  
+	Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+      } else {
+	cind=li->second;
+      }
+      fcount++;
+      (pos[cind]).push_back(fpos);
+      (posnm[cind]).push_back(nm);
+      (poslen[cind]).push_back(len);
+#ifdef DEBUG  
+      Rprintf("read in position chr=%s cind=%d fpos=%d nm=%d len=%d\n",chr.c_str(),cind,fpos,nm,len);
+      if(fcount>30) {
+	break;
+      }
+#endif
+      
+    }
+  }
+#ifdef HAVE_LIBBZ2
+  BZ2_bzReadClose( &bzerror, b);
+#endif
+
+  fclose(f);
+     
+  Rprintf("done. read %d fragments\n",fcount);
+  }
+    // construct output structures
+  SEXP chnames;
+  int np=0; // number of protections
+  PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+  for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+    SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+  }
+  np++;
+
+  // sort
+  //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+  //  sort(csi->begin(), csi->end(), lessAbsoluteValue());
+  //}
+
+  SEXP ans;
+  PROTECT(ans = allocVector(VECSXP, cnames.size()));   np++;
+  vector<vector<int> >::const_iterator nsi;
+  vector<vector<int> >::const_iterator lsi;
+  for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+    nsi=posnm.begin()+(csi-pos.begin());
+    lsi=poslen.begin()+(csi-pos.begin());
+
+    SEXP dv,dnames_R;
+    PROTECT(dnames_R = allocVector(STRSXP, 3)); np++;
+    SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+    SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+    SET_STRING_ELT(dnames_R, 2, mkChar("l"));
+    
+    
+    SEXP tv,nv,lv;
+    PROTECT(tv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(nv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(lv=allocVector(INTSXP,csi->size()));   np++;
+    int* i_tv=INTEGER(tv);
+    int* i_nv=INTEGER(nv);
+    int* i_lv=INTEGER(lv);
+    
+    int i=0;
+    vector<int>::const_iterator ini=nsi->begin();
+    vector<int>::const_iterator lni=lsi->begin();
+    for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+      i_tv[i]=*pi;
+      i_nv[i]=*ini++;
+      i_lv[i]=*lni++;
+      i++;
+    }
+    PROTECT(dv = allocVector(VECSXP, 3));   np++;
+    SET_VECTOR_ELT(dv, 0, tv);
+    SET_VECTOR_ELT(dv, 1, nv);
+    SET_VECTOR_ELT(dv, 2, lv);
+    setAttrib(dv, R_NamesSymbol, dnames_R);
+    
+    SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+  }
+
+  setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG  
+  Rprintf("unprotecting %d elements\n",np);
+#endif
+  
+  UNPROTECT(np);
+  return(ans);
+}
+
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/cdensum.c	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,144 @@
+#include <math.h>
+#include "R.h"
+#include "Rmath.h"
+#include "Rinternals.h"
+
+
+#undef DEBUG 1
+
+// dout is npos-length output array.
+// n - number of positions in pos (and length of tc count array)
+// spos - starting position
+void cdensum(int *n, double *pos, double *tc, double *spos, int *bw,int *dw, int *npos, int *step,double *dout)
+{
+  int i,j;
+ 
+  double epos= *spos + ((double) *npos);
+  double dbw=(double) *bw;
+  for(i = 0; i< *n; i++) {
+    // size of the window to which the contributions should be added
+    int in=(int) (pos[i]- *spos);
+    int ic=tc[i];
+    int whs=(*dw)*(*bw)*ic;
+    int ws=(int) floor((in-whs)/(*step));
+    int we=(int) ceil((in+whs)/(*step));
+    if(ws<0) { ws=0; } 
+    if(we>= *npos) { we= *npos -1; }
+    
+    for(j=ws;j<we;j++) {
+      double beta=((double)(j*(*step)-in))/dbw;
+      dout[j]+=((double)ic)*exp(-0.5*beta*beta);
+    }
+  }
+}
+
+
+// window tag counts
+// dout is npos-length output array that will contain window tag counts
+// windows are of a specified size, moved at a specified step
+// n - number of positions in sorted tag array (positive only)
+// spos - starting position
+void window_n_tags(int *n, double *pos, double *spos, int *window_size, int *window_step, int *npos, int *dout)
+{
+  int i;
+  int cs=0; int ce=0; // current array start/end indecies
+  int ctc=0; // current tag count
+  double wpos=*spos-(*window_size)/2; // left-edge position
+  //Rprintf("n=%d; window_size=%d, window_step=%d, npos=%d, spos=%f\n",*n,*window_size,*window_step,*npos,*spos);
+  for(i=0;i<*npos;i++) {
+    // advance end if needed
+    double ep=wpos+(*window_size);
+    while(ce<(*n) && pos[ce]<=ep) {
+      ctc++; ce++;
+    }
+    // advance start
+    while(cs<*n && pos[cs]<wpos) {
+      ctc--; cs++;
+    }
+    dout[i]=ctc;
+    // advance window position
+    wpos+=*window_step;
+  }
+}
+
+// window tag counts
+// windows are of a specified size, moved at a specified step
+// pos - tag positions (positive, pre-shifted)y
+// spos - starting position
+// returns nsteps-length output array that will contain window tag counts
+SEXP cwindow_n_tags(SEXP pos_R, SEXP spos_R, SEXP window_size_R, SEXP window_step_R, SEXP nsteps_R) {
+  double* pos=REAL(pos_R);
+  int n=LENGTH(pos_R);
+  int window_size=*INTEGER(window_size_R);
+  int window_step=*INTEGER(window_step_R);
+  int nsteps=*INTEGER(nsteps_R);
+  double spos=*REAL(spos_R);
+  
+  // allocate return array
+  SEXP tc_R;
+  PROTECT(tc_R=allocVector(INTSXP,nsteps));
+  int* dout=INTEGER(tc_R);
+
+  int i;
+  int cs=0; int ce=0; // current array start/end indecies
+  int ctc=0; // current tag count
+  double wpos=spos-window_size/2; // left-edge position
+  //Rprintf("n=%d; window_size=%d, window_step=%d, npos=%d, spos=%f\n",n,window_size,window_step,nsteps,spos);
+  for(i=0;i<nsteps;i++) {
+    // advance end if needed
+    double ep=wpos+window_size;
+    while(ce<n && pos[ce]<=ep) {
+      ctc++; ce++;
+    }
+    // advance start
+    while(cs<n && pos[cs]<wpos) {
+      ctc--; cs++;
+    }
+    dout[i]=ctc;
+    // advance window position
+    wpos+=window_step;
+  }
+  UNPROTECT(1);
+  return(tc_R);
+}
+
+// tag counts in windows around specified positions
+// pos - tag positions 
+// ntags - number of tags in each position
+// wpos - window positions
+// returns a pos-length vector giving number of tags that fall within window_half_size from the provided positions
+SEXP cwindow_n_tags_around(SEXP pos_R, SEXP ntags_R, SEXP wpos_R, SEXP window_half_size_R) {
+  double* pos=REAL(pos_R);
+  int* ntags=INTEGER(ntags_R);
+  int n=LENGTH(pos_R);
+  double* wpos=REAL(wpos_R);
+  int nw=LENGTH(wpos_R); // number of windows
+  double whs=(double) *INTEGER(window_half_size_R);
+  
+  // allocate return array
+  SEXP tc_R;
+  PROTECT(tc_R=allocVector(INTSXP,nw));
+  int* dout=INTEGER(tc_R);
+
+  int i;
+  int cs=0; int ce=0; // current array start/end indecies
+  int ctc=0; // current tag count
+  for(i=0;i<nw;i++) {
+    //if(i>(nw-2)) {      Rprintf("-i=%d; cs=%d, ce=%d; ctc=%d\n",i,cs,ce,ctc);    }    
+    // advance end if needed
+    double ep=wpos[i]+whs;
+    while(ce<n && pos[ce]<=ep) {
+      ctc+=ntags[ce]; ce++;
+    }
+    // advance start
+    double sp=wpos[i]-whs;
+    while(cs<n && pos[cs]<sp) {
+      ctc-=ntags[cs]; cs++;
+    }
+    dout[i]=ctc;
+    // if(i>(nw-2)) {      Rprintf("+i=%d; cs=%d, ce=%d; ctc=%d\n",i,cs,ce,ctc);    }
+  }
+  UNPROTECT(1);
+  return(tc_R);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/const.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,18 @@
+#ifndef NST_CONST_H
+#define NST_CONST_H
+
+#define MAX_ULL 0xffffffffffffffffull
+
+typedef unsigned long long bit64_t;
+typedef unsigned bit32_t;
+typedef unsigned short bit16_t;
+typedef unsigned char bit8_t;
+
+extern bit8_t nst_nt4_table[];
+extern bit8_t nst_nt16_table[];
+extern char *nst_nt4_rev_table;
+extern char *nst_nt16_rev_table;
+extern bit8_t nst_nt16_nt4_table[];
+extern int nst_nt16_count_table[];
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/maqmap.c	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,164 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include "const.h"
+#include "maqmap.h"
+
+maqmap_t *maq_new_maqmap()
+{
+	maqmap_t *mm = (maqmap_t*)calloc(1, sizeof(maqmap_t));
+	mm->format = MAQMAP_FORMAT_NEW;
+	return mm;
+}
+void maq_delete_maqmap(maqmap_t *mm)
+{
+	int i;
+	if (mm == 0) return;
+	for (i = 0; i < mm->n_ref; ++i)
+		free(mm->ref_name[i]);
+	free(mm->ref_name);
+	free(mm->mapped_reads);
+	free(mm);
+}
+void maqmap_write_header(gzFile fp, const maqmap_t *mm)
+{
+	int i, len;
+	gzwrite(fp, &mm->format, sizeof(int));
+	gzwrite(fp, &mm->n_ref, sizeof(int));
+	for (i = 0; i != mm->n_ref; ++i) {
+		len = strlen(mm->ref_name[i]) + 1;
+		gzwrite(fp, &len, sizeof(int));
+		gzwrite(fp, mm->ref_name[i], len);
+	}
+	gzwrite(fp, &mm->n_mapped_reads, sizeof(bit64_t));
+}
+maqmap_t *maqmap_read_header(gzFile fp)
+{
+	maqmap_t *mm;
+	int k, len;
+	mm = maq_new_maqmap();
+	gzread(fp, &mm->format, sizeof(int));
+	if (mm->format != MAQMAP_FORMAT_NEW) {
+		if (mm->format > 0) {
+			fprintf(stderr, "** Obsolete map format is detected. Please use 'mapass2maq' command to convert the format.\n");
+			exit(3);
+		}
+		assert(mm->format == MAQMAP_FORMAT_NEW);
+	}
+	gzread(fp, &mm->n_ref, sizeof(int));
+	mm->ref_name = (char**)calloc(mm->n_ref, sizeof(char*));
+	for (k = 0; k != mm->n_ref; ++k) {
+		gzread(fp, &len, sizeof(int));
+		mm->ref_name[k] = (char*)malloc(len * sizeof(char));
+		gzread(fp, mm->ref_name[k], len);
+	}
+	/* read number of mapped reads */
+	gzread(fp, &mm->n_mapped_reads, sizeof(bit64_t));
+	return mm;
+}
+
+/* mapvalidate */
+
+static void mapvalidate_core(gzFile fpin)
+{
+	maqmap_t *m = maqmap_read_header(fpin);
+	maqmap1_t *m1, mm1;
+	bit64_t n = 0;
+	int i, l;
+	bit64_t *cnt;
+	m1 = &mm1;
+	cnt = (bit64_t*)calloc(m->n_ref, 8);
+	printf("[message] number of reference sequences: %d\n", m->n_ref);
+	while ((l = maqmap_read1(fpin, m1)) != 0) {
+		if (l != sizeof(maqmap1_t)) {
+			printf("[fatal error] truncated map file.\n");
+			break;
+		}
+		++n;
+		if ((int)m1->seqid >= m->n_ref) {
+			printf("[fatal error] maqmap1_t::seqid is invalid (%d >= %d).\n", m1->seqid, m->n_ref);
+			break;
+		}
+		++cnt[m1->seqid];
+		if (m1->size >= MAX_READLEN - 1) {
+			printf("[faltal error] maqmap1_t::size is invalid (%d >= %d).\n", m1->size, MAX_READLEN - 1);
+			break;
+		}
+	}
+	if (m->n_mapped_reads != 0) {
+		if (m->n_mapped_reads != n) {
+			printf("[warning] maqmap1_t::n_mapped_reads is set, but not equals the real number (%llu != %llu).\n",
+					m->n_mapped_reads, n);
+		}
+	}
+	for (i = 0; i != m->n_ref; ++i)
+		printf("[message] %s : %llu\n", m->ref_name[i], cnt[i]);
+	free(cnt);
+	maq_delete_maqmap(m);
+}
+
+/* mapview */
+
+static void mapview_core(FILE *fpout, gzFile fpin, int is_verbose, int is_mm)
+{
+	bit32_t j;
+	maqmap_t *m = maqmap_read_header(fpin);
+	maqmap1_t *m1, mm1;
+	m1 = &mm1;
+	while (maqmap_read1(fpin, m1)) {
+		fprintf(fpout, "%s\t%s\t%d\t%c\t%d\t%u\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d",
+				m1->name, m->ref_name[m1->seqid], (m1->pos>>1) + 1,
+				(m1->pos&1)? '-' : '+', m1->dist, m1->flag, m1->map_qual, (signed char)m1->seq[MAX_READLEN-1],
+				m1->alt_qual, m1->info1&0xf, m1->info2, m1->c[0], m1->c[1], m1->size);
+		if (is_verbose) {
+			fputc('\t', fpout);
+			for (j = 0; j != m1->size; ++j) {
+				if (m1->seq[j] == 0) fputc('n', fpout);
+				else if ((m1->seq[j]&0x3f) < 27) fputc("acgt"[m1->seq[j]>>6&3], fpout);
+				else fputc("ACGT"[m1->seq[j]>>6&3], fpout);
+			}
+			fputc('\t', fpout);
+			for (j = 0; j != m1->size; ++j)
+				fputc((m1->seq[j]&0x3f) + 33, fpout);
+		}
+		if (is_mm) {
+			bit64_t *p = (bit64_t*)(m1->seq + 55);
+			fprintf(fpout, "\t%llx", *p);
+		}
+		fputc('\n', fpout);
+	}
+	maq_delete_maqmap(m);
+}
+
+int ma_mapview(int argc, char *argv[])
+{
+	int c, is_verbose = 1, is_mm = 0;
+	while ((c = getopt(argc, argv, "bN")) >= 0) {
+		switch (c) {
+		case 'b': is_verbose = 0; break;
+		case 'N': is_mm = 1; break;
+		}
+	}
+	if (argc == optind) {
+		fprintf(stderr, "Usage: maq mapview [-bN] <in.map>\n");
+		return 1;
+	}
+	gzFile fp = (strcmp(argv[optind], "-") == 0)? gzdopen(STDIN_FILENO, "r") : gzopen(argv[optind], "r");
+	mapview_core(stdout, fp, is_verbose, is_mm);
+	gzclose(fp);
+	return 0;
+}
+
+int ma_mapvalidate(int argc, char *argv[])
+{
+	gzFile fp;
+	if (argc < 2) {
+		fprintf(stderr, "Usage: maq mapvalidate <in.map>\n");
+		return 1;
+	}
+	fp = (strcmp(argv[optind], "-") == 0)? gzdopen(STDIN_FILENO, "r") : gzopen(argv[1], "r");
+	mapvalidate_core(fp);
+	gzclose(fp);
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/maqmap.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,70 @@
+#ifndef MAQMAP_H_
+#define MAQMAP_H_
+
+#ifdef MAQ_LONGREADS
+#  define MAX_READLEN 128
+#else
+#  define MAX_READLEN 64
+#endif
+
+#define MAX_NAMELEN 36
+#define MAQMAP_FORMAT_OLD 0
+#define MAQMAP_FORMAT_NEW -1
+
+#define PAIRFLAG_FF      0x01
+#define PAIRFLAG_FR      0x02
+#define PAIRFLAG_RF      0x04
+#define PAIRFLAG_RR      0x08
+#define PAIRFLAG_PAIRED  0x10
+#define PAIRFLAG_DIFFCHR 0x20
+#define PAIRFLAG_NOMATCH 0x40
+#define PAIRFLAG_SW      0x80
+
+#include <string.h>
+#include <zlib.h>
+#include "const.h"
+
+/*
+  name: read name
+  size: the length of the read
+  seq: read sequence (see also below)
+  seq[MAX_READLEN-1]: single end mapping quality (equals to map_qual if not paired)
+  map_qual: the final mapping quality
+  alt_qual: the lower quality of the two ends (equals to map_qual if not paired)
+  flag: status of the pair
+  dist: offset of the mate (zero if not paired)
+  info1: mismatches in the 24bp (higher 4 bits) and mismatches (lower 4 bits)
+  info2: sum of errors of the best hit
+  c[2]: count of all 0- and 1-mismatch hits on the reference
+ */
+typedef struct
+{
+	bit8_t seq[MAX_READLEN]; /* the last base is the single-end mapping quality. */
+	bit8_t size, map_qual, info1, info2, c[2], flag, alt_qual;
+	bit32_t seqid, pos;
+	int dist;
+	char name[MAX_NAMELEN];
+} maqmap1_t;
+
+typedef struct
+{
+	int format, n_ref;
+	char **ref_name;
+	bit64_t n_mapped_reads;
+	maqmap1_t *mapped_reads;
+} maqmap_t;
+
+#define maqmap_read1(fp, m1) gzread((fp), (m1), sizeof(maqmap1_t))
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+	maqmap_t *maq_new_maqmap();
+	void maq_delete_maqmap(maqmap_t *mm);
+	void maqmap_write_header(gzFile fp, const maqmap_t *mm);
+	maqmap_t *maqmap_read_header(gzFile fp);
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/maqread.cpp	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,207 @@
+#include "pc.h"
+#include <vector>
+#include <string.h>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <strstream>
+#include <algorithm>
+#include <string>
+#include <functional>
+#include <utility>
+#include <zlib.h>
+
+extern "C" {
+#include "R.h"
+#include "Rmath.h"
+#include "Rinternals.h"
+#include "Rdefines.h"
+#include "maqmap.h"
+}
+
+using namespace std;
+using namespace __gnu_cxx; 
+
+
+class lessAbsoluteValue {
+public:
+  bool operator()(int a, int b) const {
+    return abs(a) < abs(b);
+  }
+};
+
+
+
+//#define DEBUG 1
+
+extern "C" {
+
+  // read in text version of maq map
+  SEXP read_binmaqmap(SEXP filename,SEXP read_tag_names_R) {
+
+#ifdef DEBUG  
+  Rprintf("start\n");
+#endif
+  const char* fname=CHAR(asChar(filename));
+  int read_names=*(INTEGER(read_tag_names_R));
+#ifdef DEBUG  
+  Rprintf("fname=%s\n",fname);
+#endif
+
+  // main data vector
+  // chr - pos
+  vector< vector<int> > pos;
+  vector< vector<int> > posnm; // number of mismatches
+  vector< vector<string> > tagnames;
+
+  // chromosome map
+  hash_map<string, int, hash<string>,equal_to<string> > cind_map;
+  vector<string> cnames;
+  
+
+  gzFile f=gzopen(fname,"r");
+
+  maqmap_t *m = maqmap_read_header(f);  
+  maqmap1_t *m1, mm1;
+  m1 = &mm1;
+
+  if (!f)  { 
+    cout<<"can't open input file \""<<fname<<"\"\n"; 
+  }  else {
+    Rprintf("opened %s\n",fname);
+
+    // read in bed line
+    string line;
+    int fcount=0;
+    while(maqmap_read1(f, m1)) {
+      string tagname=string(m1->name);
+      string chr=string(m->ref_name[m1->seqid]);
+      int len=m1->size;
+      int fpos=(m1->pos>>1) + 1;
+      if(m1->pos&1) {
+	fpos=-1*(fpos+len-1);
+      }
+      int nm=m1->info1&0xf;
+
+#ifdef DEBUG  
+      Rprintf("read in map line chr=%s tagname=%s fpos=%d, nm=%d, len=%d\n",chr.c_str(),tagname.c_str(),fpos,nm,len);
+#endif
+    
+
+      // determine the chromosome index
+      hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
+      int cind=-1;
+      if(li==cind_map.end()) {
+	// register new chromosome
+	cind=cnames.size();
+	cnames.push_back(chr);
+	cind_map[chr]=cind;
+	// allocate new pos vector
+	pos.push_back(vector<int>());
+	posnm.push_back(vector<int>());
+	if(read_names) {
+	  tagnames.push_back(vector<string>());
+	}
+#ifdef DEBUG  
+	Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
+#endif
+      } else {
+	cind=li->second;
+      }
+      fcount++;
+      (pos[cind]).push_back(fpos);
+      (posnm[cind]).push_back(nm);
+      if(read_names) {
+	(tagnames[cind]).push_back(tagname);
+      }
+#ifdef DEBUG  
+      Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
+      if(fcount>30) {
+	break;
+      }
+#endif
+      
+    }
+    gzclose(f);
+    Rprintf("done. read %d fragments\n",fcount);
+  }
+  
+
+    // construct output structures
+  SEXP chnames;
+  int np=0; // number of protections
+  PROTECT(chnames = allocVector(STRSXP, cnames.size()));
+  for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
+    SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
+  }
+  np++;
+
+  // sort
+  //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
+  //  sort(csi->begin(), csi->end(), lessAbsoluteValue());
+  //}
+
+  SEXP ans;
+  PROTECT(ans = allocVector(VECSXP, cnames.size()));   np++;
+  vector<vector<int> >::const_iterator nsi;
+  vector<vector<string> >::const_iterator ssi;
+  for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
+    nsi=posnm.begin()+(csi-pos.begin());
+
+    SEXP dv,dnames_R;
+    PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++;
+    SET_STRING_ELT(dnames_R, 0, mkChar("t"));
+    SET_STRING_ELT(dnames_R, 1, mkChar("n"));
+    if(read_names) {
+      SET_STRING_ELT(dnames_R, 2, mkChar("s"));
+    }
+    
+    
+    
+    SEXP tv,nv,sv;
+    PROTECT(tv=allocVector(INTSXP,csi->size()));   np++;
+    PROTECT(nv=allocVector(INTSXP,csi->size()));   np++;
+    if(read_names) {
+      PROTECT(sv=allocVector(STRSXP,csi->size()));   np++;
+    }
+    int* i_tv=INTEGER(tv);
+    int* i_nv=INTEGER(nv);
+    
+    int i=0;
+    vector<int>::const_iterator ini=nsi->begin();
+    for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
+      i_tv[i]=*pi;
+      i_nv[i]=*ini++;
+      i++;
+    }
+    if(read_names) {
+      int i=0;
+      ssi=tagnames.begin()+(csi-pos.begin());
+      for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
+	SET_STRING_ELT(sv,i,mkChar(si->c_str()));
+	i++;
+      }
+    }
+    PROTECT(dv = allocVector(VECSXP, 2+read_names));   np++;
+    SET_VECTOR_ELT(dv, 0, tv);
+    SET_VECTOR_ELT(dv, 1, nv);
+    if(read_names) {
+      SET_VECTOR_ELT(dv, 2, sv);
+    }
+    setAttrib(dv, R_NamesSymbol, dnames_R);
+    
+    SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
+  }
+
+  setAttrib(ans,R_NamesSymbol,chnames);
+
+#ifdef DEBUG  
+  Rprintf("unprotecting %d elements\n",np);
+#endif
+  
+  UNPROTECT(np);
+  return(ans);
+}
+
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/pc.h	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,20 @@
+#ifndef PC_H
+#define PC_H 1
+#include <functional>
+//#include <hash_map.h>
+#include <ext/hash_set>
+#include <ext/hash_map>
+
+
+namespace __gnu_cxx
+{
+        template<> struct hash< std::string >
+        {
+                size_t operator()( const std::string& x ) const
+                {
+                        return hash< const char* >()( x.c_str() );
+                }
+        };
+}
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/peaks.cpp	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,804 @@
+#include <vector>
+#include <string.h>
+#include <iostream>
+#include <string>
+#include <set>
+
+extern "C" {
+#include "R.h"
+#include "Rmath.h"
+#include "Rinternals.h"
+#include "Rdefines.h"
+}
+
+using namespace std;
+using namespace __gnu_cxx; 
+
+/**
+ * Calculate all local peaks
+ */
+
+//#define DEBUG 1
+
+extern "C" {
+  SEXP find_peaks(SEXP x_R,SEXP thr_R,SEXP max_span_R) {
+
+#ifdef DEBUG  
+    Rprintf("start\n");
+#endif
+    double* x=REAL(x_R);
+    int nx=LENGTH(x_R);
+    int max_span=*INTEGER(max_span_R);
+    double thr=REAL(thr_R)[0];
+#ifdef DEBUG  
+    Rprintf("n=%d; thr=%f; max_span=%d\n",nx,thr,max_span);
+#endif
+
+    vector<int> pos;
+  
+    double pv=x[0];
+    double ppv=0; // previous peak value
+    int ppp=-max_span-1; // previous peak position
+    
+    for(int i=1;i<(nx-1);i++) {
+      if(x[i]>pv && x[i]>=thr && x[i]>x[i+1]) {
+	if(max_span>2) {
+	  //Rprintf("i=%d; ppp=%d\n",i,ppp);
+	  if(i-ppp > max_span) {
+	    if(ppp>=0) {
+	      pos.push_back(ppp);
+	    }
+	    //Rprintf("recorded %d; now %d\n",ppp,i);
+	    ppp=i; ppv=x[i];
+	  } else {
+	    if(x[i]>ppv) {
+	      //Rprintf("reset from %d to %d\n",ppp,i);
+	      ppp=i; ppv=x[i];
+	    }
+	  }
+	} else {
+	  pos.push_back(i);
+	}
+      }
+      if(x[i]!=x[i+1]) { pv=x[i]; }
+    }
+
+    // add remaining peak
+    if(max_span>2 && ppp>=0) {
+      pos.push_back(ppp);
+    }
+
+    SEXP nv;
+    PROTECT(nv=allocVector(INTSXP,pos.size())); 
+    int* i_nv=INTEGER(nv);
+    int i=0;
+    for(vector<int> ::const_iterator pi=pos.begin();pi!=pos.end();++pi) {
+      i_nv[i++]=1+(*pi);
+    }
+  
+    UNPROTECT(1);
+    return(nv);
+  }
+
+
+
+
+  /************************************************************************/
+  // given a data vector d (positive values) and a set of signed center coordinates pos,
+  // returns coordinates of data points relative to the centers
+  // size is the size of the region around the centers
+  // return: vector of relative coordinates (x) and indecies of centers relative the coordinate
+  // was calculated (i).
+  SEXP get_relative_coordinates(SEXP d_R,
+				SEXP pos_R,
+				SEXP size_R)
+  {
+    int *d, *pos; 
+    int npos,nd,size;
+  
+    d = INTEGER(d_R); pos = INTEGER(pos_R);
+    npos=LENGTH(pos_R);  nd=LENGTH(d_R);
+    size = INTEGER(size_R)[0];
+#ifdef DEBUG  
+    Rprintf("|d|=%d, |c|=%d, size=%d\n",nd,npos,size);
+#endif
+
+    vector<int> x; vector<int> xi;
+    int k=0; // current pos index
+    
+    for(int i=0;i<nd;i++) {
+      // increment k until pos[k]+size>=d[i]
+      while((abs(pos[k])+size) < d[i]) { k++; if(k==npos) { break; };
+#ifdef DEBUG  
+	Rprintf("advancing k to %d\n",k);
+#endif
+      }
+      if(k==npos) { break; };
+      // increment i until d[i]>=pos[k]-size
+      while((abs(pos[k])-size) > d[i]) { i++; if(i==nd) { break; }
+#ifdef DEBUG  
+	Rprintf("advancing i to %d\n",i);
+#endif
+      }
+      if(i==nd) { break; }
+
+
+      int l=k;
+      while((l<npos) && ((abs(pos[l])-size) <= d[i])) { l++; 
+#ifdef DEBUG  
+	Rprintf("advancing l to %d\n",l);
+#endif
+      }
+      for(int j=k;j<l;j++) {
+	int pd=d[i]-abs(pos[j]);
+	if(abs(pd)<=size) {
+	  // record
+	  if(pos[j]>0) {
+	    x.push_back(pd);
+	  } else {
+	    x.push_back(-1*pd);
+	  }
+	  xi.push_back(j);
+#ifdef DEBUG  	
+	  Rprintf("recorded i=%d, j=%d\n",i,j);
+#endif
+	} else {
+	  break;
+	}
+      }
+    }
+    
+    SEXP xv_R,xiv_R;
+    PROTECT(xv_R=allocVector(INTSXP,x.size())); 
+    PROTECT(xiv_R=allocVector(INTSXP,x.size())); 
+    int* xv=INTEGER(xv_R);
+    int* xiv=INTEGER(xiv_R);
+
+    int i=0;
+    for(vector<int> ::const_iterator pi=x.begin();pi!=x.end();++pi) {
+      xv[i++]=*pi;
+    }
+    i=0;
+    for(vector<int> ::const_iterator pi=xi.begin();pi!=xi.end();++pi) {
+      xiv[i++]=1+(*pi);
+    }
+    
+    SEXP ans_R, names_R;
+    PROTECT(names_R = allocVector(STRSXP, 2));
+    SET_STRING_ELT(names_R, 0, mkChar("x"));
+    SET_STRING_ELT(names_R, 1, mkChar("i"));
+    
+    PROTECT(ans_R = allocVector(VECSXP, 2));
+    SET_VECTOR_ELT(ans_R, 0, xv_R);
+    SET_VECTOR_ELT(ans_R, 1, xiv_R);
+    setAttrib(ans_R, R_NamesSymbol, names_R);
+  
+    UNPROTECT(4);
+    return(ans_R);
+  }
+
+
+  // determines a set of points within a set of fragments
+  // note: all vectors sorted in ascending order
+  // note: all vectors are integers
+  // x_R - vector of point positions 
+  // se_R - vector of start and end positions 
+  // fi_R - vector of signed fragment indecies
+  // return_list_R - whether a list of fragments should be returned for each point
+  // return_unique_R - whether points in multiple fragments should be omitted
+  SEXP points_within(SEXP x_R,SEXP se_R,SEXP fi_R,SEXP return_list_R,SEXP return_unique_R,SEXP return_point_counts_R) {
+#ifdef DEBUG  
+    Rprintf("start\n");
+#endif
+    int* x=INTEGER(x_R);
+    int nx=LENGTH(x_R);
+    int* se=INTEGER(se_R);
+    int* fi=INTEGER(fi_R);
+    int nf=LENGTH(se_R);
+
+    int return_list=*(INTEGER(return_list_R));
+    int return_unique=*(INTEGER(return_unique_R));
+    int return_point_counts=*(INTEGER(return_point_counts_R));
+
+#ifdef DEBUG  
+    Rprintf("nf=%d; nx=%d, return_list=%d, return_unique=%d, return_point_counts=%d\n",nf/2,nx,return_list,return_unique,return_point_counts);
+#endif
+    set<int> fset;
+
+
+    SEXP nv; int *i_nv;
+    int np=0;
+    if(return_point_counts) {
+      PROTECT(nv = allocVector(INTSXP, nf/2)); np++;      
+      i_nv=INTEGER(nv);
+      for(int i=0;i<nf/2;i++) { i_nv[i]=0; }
+    } else if(return_list) {
+      PROTECT(nv = allocVector(VECSXP, nx)); np++;
+    } else {
+      PROTECT(nv=allocVector(INTSXP,nx));  np++;
+      i_nv=INTEGER(nv);
+    }
+
+    int j=0;
+
+    for(int i=0;i<nx;i++) {
+      // advance j
+      while(j<nf && se[j]<x[i]) {
+	int frag=fi[j];
+	if(frag>0) { // insert
+	  fset.insert(frag);
+#ifdef DEBUG  
+	  Rprintf("inserted frag %d, size=%d\n",frag,fset.size());
+#endif
+	} else { // remove
+	  fset.erase(-frag);
+#ifdef DEBUG  
+	  Rprintf("removed frag %d, size=%d\n",-frag,fset.size());
+#endif
+	}
+	j++;
+      }
+#ifdef DEBUG  
+      Rprintf("i=%d j=%d\n",i,j);
+#endif
+      if(return_list) {
+	if(fset.empty() || (return_unique && fset.size()>1)) {
+	  // assign null list?
+	} else {
+	  SEXP fil_R;
+	  PROTECT(fil_R=allocVector(INTSXP,fset.size()));  np++;
+	  int* fil=INTEGER(fil_R);
+	  int k=0;
+	  for(set<int>::const_iterator ki=fset.begin();ki!=fset.end();++ki) {
+	    fil[k]=*ki; k++;
+	  }
+	  SET_VECTOR_ELT(nv, i, fil_R);
+	  UNPROTECT(1); np--;
+	}
+      } else {
+	if(return_point_counts) {
+	  for(set<int>::const_iterator ki=fset.begin();ki!=fset.end();++ki) {
+	    i_nv[*ki-1]++;
+	  }
+	} else {
+	  if(fset.empty() || (return_unique && fset.size()>1)) {
+	    i_nv[i]=-1;
+	  } else {
+	    i_nv[i]=*fset.begin();
+	  }
+	}
+      }
+    }
+
+    UNPROTECT(np);
+    return(nv);
+  }
+
+
+  SEXP expuni_lr(SEXP x_R,      // positions and their number (assumed sorted in ascending order)
+		 SEXP mdist_R,  // max distance at which points should be considered
+		 SEXP lambda_R,  // lambda value
+		 SEXP spos_R,  // starting position
+		 SEXP epos_R,  // ending position
+		 SEXP step_R,  // step size
+		 SEXP return_peaks_R, // whether peak positions should be returned, or entire score vector
+		 SEXP min_peak_lr_R // min peak height (lr)
+		 ) 
+  {
+
+#ifdef DEBUG  
+    Rprintf("start\n");
+#endif
+    int* x=INTEGER(x_R);
+    int nx=LENGTH(x_R);
+    int mdist=INTEGER(mdist_R)[0];
+    double lambda=*(REAL(lambda_R));
+
+    int return_peaks=*(INTEGER(return_peaks_R));
+    double min_peak=*(REAL(min_peak_lr_R));
+
+    int spos=*(INTEGER(spos_R));
+    int epos=*(INTEGER(epos_R));
+    int step=*(INTEGER(step_R));
+
+    int nsteps=(int) (epos-spos)/step;
+
+
+#ifdef DEBUG  
+    Rprintf("n=%d; lambda=%f; mdist=%d; spos=%d; epos=%d; step=%d; nsteps=%d\n",nx,lambda,mdist,spos,epos,step,nsteps);
+#endif
+
+    
+    SEXP nv;
+    double *d_nv;
+    if(!return_peaks) {
+      PROTECT(nv=allocVector(REALSXP,nsteps+1)); 
+      d_nv=REAL(nv);
+    }
+
+
+    int i=0; // current index of the first point being used in the calculations
+    int j=0; // current index of the last point being used in the calculations
+    int sx=0; // current sum of all positions
+    int n=0;
+    
+    for(int k=0; k<=nsteps; k++) {
+      int cpos=spos+k*step;
+      // increase i until x[i]>=cpos-mdist; remove x from sx; decrement n;
+      while(i<nx && x[i]<(cpos-mdist)) {
+	n--; sx-=x[i]; i++;
+	//Rprintf("incremented i: i=%d; n=%d; sx=%d; cpos-mdist=%d; x[i-1]=%d\n",i,n,sx,cpos-mdist,x[i-1]);
+      }
+      //Rprintf("stable i: i=%d; n=%d; sx=%d; cpos-mdist=%d; x[i-1]=%d\n",i,n,sx,cpos-mdist,x[i-1]);
+
+      //if(i>j) { j=i; }
+
+      // increase j until x[j]>cpos
+      while(j<nx && x[j]<=cpos) {
+	n++; sx+=x[j]; j++;
+	//Rprintf("incremented j: j=%d; n=%d; sx=%d; cpos=%d; x[j-1]=%d\n",j,n,sx,cpos,x[j-1]);
+      }
+      //Rprintf("stable j: j=%d; n=%d; sx=%d; cpos=%d; x[j-1]=%d\n",j,n,sx,cpos,x[j]);
+      
+      // calculate lr
+      d_nv[k]=((double)(1-n))*log(lambda)-lambda*((double)(n*(cpos+1)-sx));
+      //Rprintf("recorded lr[%d]=%f\n",k-1,d_nv[k-1]);
+    }
+    UNPROTECT(1);
+    return(nv);
+  }
+
+
+  SEXP allpdist(SEXP x_R,SEXP max_dist_R) {
+
+#ifdef DEBUG  
+    Rprintf("start\n");
+#endif
+    double* x=REAL(x_R);
+    int nx=LENGTH(x_R);
+    double max_dist=*REAL(max_dist_R);
+#ifdef DEBUG  
+    Rprintf("n=%d; max_dist=%d\n",nx,max_dist);
+#endif
+
+    vector<double> dist;
+    
+    for(int i=0;i<nx;i++) {
+      for(int j=i+1;j<nx;j++) {
+
+	double d=x[j]-x[i];
+#ifdef DEBUG  
+	Rprintf("i=%d; j=%d; d=%f\n",i,j,d);
+#endif
+	if(d<=max_dist) {
+	  dist.push_back(d);
+	} else {
+	  break;
+	}
+      }
+    }
+
+    SEXP nv;
+    PROTECT(nv=allocVector(REALSXP,dist.size())); 
+    double* i_nv=REAL(nv);
+    int i=0;
+    for(vector<double> ::const_iterator pi=dist.begin();pi!=dist.end();++pi) {
+      i_nv[i++]=*pi;
+    }
+  
+    UNPROTECT(1);
+    return(nv);
+  }
+
+  // same as above, but for two different sets
+  SEXP allxpdist(SEXP x_R,SEXP y_R, SEXP max_dist_R) {
+
+#ifdef DEBUG  
+    Rprintf("start\n");
+#endif
+    double* x=REAL(x_R);
+    double* y=REAL(y_R);
+    int nx=LENGTH(x_R);
+    int ny=LENGTH(y_R);
+    double max_dist=*REAL(max_dist_R);
+#ifdef DEBUG  
+    Rprintf("nx=%d; ny=%d; max_dist=%d\n",nx,ny,max_dist);
+#endif
+
+    vector<double> dist;
+    int yi=0; // latest y start index
+
+    for(int i=0;i<nx;i++) {
+      // adjust yi so that yi>=x[i]-max_dist_R
+      while(y[yi]<(x[i]-max_dist) && yi<ny) { yi++; }
+      if(yi==ny) { break; }
+
+      for(int j=yi;j<ny;j++) {
+        double d=y[j]-x[i];
+#ifdef DEBUG  
+        Rprintf("i=%d; j=%d; d=%f\n",i,j,d);
+#endif
+        if(d<=max_dist) {
+          dist.push_back(d);
+        } else {
+          break;
+        }
+      }
+    }
+
+    SEXP nv;
+    PROTECT(nv=allocVector(REALSXP,dist.size()));
+    double* i_nv=REAL(nv);
+    int i=0;
+    for(vector<double> ::const_iterator pi=dist.begin();pi!=dist.end();++pi) {
+      i_nv[i++]=*pi;
+    }
+
+    UNPROTECT(1);
+    return(nv);
+  }
+
+  // returns a vector giving for each point,
+  // number of points within a given max_dist
+  SEXP nwithindist(SEXP x_R,SEXP max_dist_R) {
+
+#ifdef DEBUG  
+    Rprintf("start\n");
+#endif
+    double* x=REAL(x_R);
+    int nx=LENGTH(x_R);
+    double max_dist=*REAL(max_dist_R);
+
+    SEXP nv;
+    PROTECT(nv=allocVector(REALSXP,nx)); 
+    double* i_nv=REAL(nv);
+    for(int i=0;i<nx;i++) { i_nv[i]=0; }
+
+#ifdef DEBUG  
+    Rprintf("n=%d; max_dist=%d\n",nx,max_dist);
+#endif
+
+    for(int i=0;i<nx;i++) {
+      for(int j=i+1;j<nx;j++) {
+
+	double d=x[j]-x[i];
+#ifdef DEBUG  
+	Rprintf("i=%d; j=%d; d=%f\n",i,j,d);
+#endif
+	if(d<=max_dist) {
+	  i_nv[i]++;
+	  i_nv[j]++;
+	} else {
+	  break;
+	}
+      }
+    }
+  
+    UNPROTECT(1);
+    return(nv);
+  }
+
+
+
+
+  // given a list of sorted chromosome signal and background vectors (unscaled), determine 
+  // cluster contigs exceeding thr poisson P value, based on a whs window size,
+  // and satisfying mcs cluster size
+  SEXP find_poisson_enrichment_clusters(SEXP pos_R,SEXP flag_R,SEXP wsize_R,SEXP thr_R,SEXP mcs_R,SEXP bgm_R,SEXP mintag_R,SEXP either_R) {
+
+#ifdef DEBUG  
+    Rprintf("start\n");
+#endif
+    double* pos=REAL(pos_R);
+    int* flag=INTEGER(flag_R);
+    int nt=LENGTH(pos_R);
+    
+    int mcs=*INTEGER(mcs_R);
+    int wsize=*INTEGER(wsize_R);
+    int either=*INTEGER(either_R);
+    double thr=REAL(thr_R)[0];
+    double bgm=REAL(bgm_R)[0];
+    double mintag=REAL(mintag_R)[0];
+
+#ifdef DEBUG  
+    Rprintf("nt=%d; wsize=%d; thr=%f; mcs=%d; min.tag=%f; bgm=%f\n",nt,wsize,thr,mcs,mintag,bgm);
+#endif
+    
+    
+    vector< pair<double,double> > contigs;
+    
+    // running indecies (start and end)
+    int si=0; 
+    int ei=0;
+    
+    // current window coordinate
+    double ws=pos[0];
+    
+    // current window tag counts
+    int cc[2]={0,0};
+
+
+    if(nt>0) {
+      cc[flag[si]]++;    
+      // increment window end
+      while(ei<(nt-1) && (pos[ei+1]-ws) <= wsize) {
+	ei++;
+	cc[flag[ei]]++;
+      }
+
+
+      // cluster start,end positions
+      double cs,ce;
+      int inclust=0;
+
+      while(si<nt-1) {
+      
+	if((pos[si+1]-ws) > (pos[ei+1] - ws - wsize) && ei!=(nt-1)) {
+	  // move end boudnary
+	  ei++;
+	  ws=pos[ei]-wsize;
+	  cc[flag[ei]]++;
+	  while(ei<(nt-1) && pos[ei+1]==ws+wsize) {
+	    ei++;
+	    cc[flag[ei]]++;
+	  }
+	
+	  // increment window start
+	  while(si<(nt-1) && pos[si] < ws) {
+	    cc[flag[si]]--;
+	    si++;
+	  }
+
+	} else {
+	  // move up start boundary
+	  ws=pos[si+1];
+	  cc[flag[si]]--;
+	  si++;
+	  while(si<(nt-1) && pos[si+1]==ws) {
+	    cc[flag[si]]--;
+	    si++;
+	  }
+	
+	  // increment window end
+	  while(ei<(nt-1) && (pos[ei+1] - ws) <= wsize) {
+	    ei++;
+	    cc[flag[ei]]++;
+	  }
+
+	}
+
+	// calculate z score
+	double dc0=((double)cc[0])+0.5;
+	double dc1=((double)cc[1])+0.5;
+	double rte=dc0+dc1-0.25*thr*thr;
+	double lb;
+	if(rte<=0) { 
+	  lb=0; 
+	} else {
+	  lb=(sqrt(dc1*dc0) - 0.5*thr*sqrt(rte))/(dc0 - 0.25*thr*thr);
+	  if(lb<0) { lb=0; }
+	  lb*=lb;
+	}
+
+	//Rprintf("%f=f(%f,%f,%f); %f=f(%f,%f,%f)\n",lb,1.0-thr,2.0*dc1,2.0*dc0,ub,thr,2.0*dc1,2.0*dc0);
+      
+#ifdef DEBUG  
+	//double ub=gsl_cdf_fdist_Qinv(thr,2.0*dc1,2.0*dc0)*dc1/dc0;
+	double ub=(sqrt(dc1*dc0) + 0.5*thr*sqrt(rte))/(dc0 - 0.25*thr*thr);
+	ub*=ub;
+	Rprintf("s=%d (%f); e=%d (%f); window: %f-%f; cc=[%d,%d]; lb=%f; ub=%f\n",si,pos[si],ei,pos[ei],ws,ws+wsize,cc[0],cc[1],lb,ub);
+#endif
+      
+	int bc=lb>=bgm && cc[1]>=mintag;
+	if(either) {
+	  bc=lb>=bgm || cc[1]>=mintag;
+	}
+	if(bc) {
+	  if(inclust) {
+	    double nce=ws+wsize/2.0;
+	    if(nce-ce > wsize/2.0) {
+	      // next point is too far removed, end cluster
+	      if(ce-cs >= mcs) {
+		contigs.push_back(pair<double,double>(cs,ce));
+#ifdef DEBUG  
+		Rprintf("recorded cluster %f-%f\n",cs,ce);
+#endif
+	      }
+	      inclust=0;
+	    } else {
+	      ce=nce;
+	    }
+	  } else {
+	    inclust=1;
+	    cs=ws+wsize/2.0;
+	    ce=cs;
+	  }
+	} else {
+	  if(inclust) {
+	    if(ce-cs >= mcs) {
+	      contigs.push_back(pair<double,double>(cs,ce));
+#ifdef DEBUG  
+	      Rprintf("recorded cluster %f-%f\n",cs,ce);
+#endif
+	    }
+	    inclust=0;
+	  }
+	}
+    
+      }
+
+      if(inclust) {
+	if(ce-cs >= mcs) {
+	  contigs.push_back(pair<double,double>(cs,ce));
+#ifdef DEBUG  
+	  Rprintf("recorded cluster %f-%f\n",cs,ce);
+#endif
+	}
+	inclust=0;
+      }
+    }
+    
+    SEXP cs_R,ce_R;
+    PROTECT(cs_R=allocVector(REALSXP,contigs.size())); 
+    PROTECT(ce_R=allocVector(REALSXP,contigs.size())); 
+    double* csa=REAL(cs_R);
+    double* cea=REAL(ce_R);
+
+    int i=0;
+    for(vector< pair<double,double> >::const_iterator ci=contigs.begin(); ci!=contigs.end();++ci) {
+      csa[i]=ci->first;
+      cea[i]=ci->second;
+      i++;
+    }
+    
+    SEXP ans_R, names_R;
+    PROTECT(names_R = allocVector(STRSXP, 2));
+    SET_STRING_ELT(names_R, 0, mkChar("s"));
+    SET_STRING_ELT(names_R, 1, mkChar("e"));
+    
+    PROTECT(ans_R = allocVector(VECSXP, 2));
+    SET_VECTOR_ELT(ans_R, 0, cs_R);
+    SET_VECTOR_ELT(ans_R, 1, ce_R);
+    setAttrib(ans_R, R_NamesSymbol, names_R);
+  
+    UNPROTECT(4);
+    return(ans_R);
+
+  }
+
+
+  // finds intersection between a list of regions
+  // the flag has +n/-n value, corresponding to the start/end of a segment in n-th regionset
+  // max_val: 1 - report max overlapping value, -1: report min, 0 - don't look at values
+  // returns: $s, $e, ($v) lists
+  SEXP region_intersection(SEXP n_R,SEXP pos_R,SEXP flags_R,SEXP vals_R,SEXP max_val_R,SEXP union_R) {
+    const int max_val=*INTEGER(max_val_R);
+    const int unionr=*INTEGER(union_R);
+    const int n=*INTEGER(n_R);
+    double* pos=REAL(pos_R);
+    int* flags=INTEGER(flags_R);
+    double* val=REAL(vals_R);
+    
+#ifdef DEBUG  
+    Rprintf("n=%d; npos=%d; max_val=%d\n",n,LENGTH(pos_R),max_val);
+#endif
+
+    int s[n]; // flag status for each set
+    double mv[n]; // max/min value of current clusters
+
+    for(int i=0;i<n;i++) { s[i]=0; }
+    
+    vector<double> starts;
+    vector<double> ends;
+    vector<double> values;
+
+    int start=-1;
+    double mval=0;
+    for(int i=0;i<LENGTH(pos_R);i++) {
+      // update flags
+      int f=flags[i];
+      if(f>0) {
+	s[abs(f)-1]++;
+      } else {
+	s[abs(f)-1]--;
+      }
+      
+      if(max_val!=0 && val[i]*max_val > mval*max_val) { mval=val[i]; }
+
+      // joined status
+      int all;
+      if(unionr) {
+	all=0;
+	for(int j=0;j<n;j++) { if(s[j]>0) { all=1; break;} }
+      } else {
+	all=1;
+	for(int j=0;j<n;j++) { all=all & (s[j]>0); }
+      }
+      
+      
+      //Rprintf("i=%d; s=[",i);
+      //for(int j=0;j<n;j++) { Rprintf("%d",s[j]); }
+      //Rprintf("]; all=%d; start=%d\n",all,start);
+
+      if(start>=0) {
+	// in fragment
+	if(!all) { 
+	  // end fragment
+	  starts.push_back(pos[start]);
+	  ends.push_back(pos[i]);
+	  start=-1;
+	  if(max_val!=0) { values.push_back(mval); }
+
+#ifdef DEBUG  
+	  Rprintf("recorded new fragment (s=%f,e=%f,v=%f);\n",pos[start],pos[i],mval);
+#endif
+	}
+      } else {
+	// should a fragment be started?
+	if(all) {
+	  start=i;
+	  if(max_val!=0) { mval=val[i]; }
+#ifdef DEBUG  
+	  Rprintf("starting new fragment (s=%f,i=%d);\n",pos[start],i);
+#endif
+	}
+      }
+    }
+    SEXP cs_R,ce_R,cv_R;
+    PROTECT(cs_R=allocVector(REALSXP,starts.size())); 
+    PROTECT(ce_R=allocVector(REALSXP,ends.size())); 
+    
+    double* csa=REAL(cs_R);
+    int i=0;
+    for(vector<double>::const_iterator ci=starts.begin(); ci!=starts.end(); ++ci) {
+      csa[i]=*ci; i++;
+    }
+
+    csa=REAL(ce_R);
+    i=0;
+    for(vector<double>::const_iterator ci=ends.begin(); ci!=ends.end(); ++ci) {
+      csa[i]=*ci; i++;
+    }
+    
+    if(max_val!=0) {
+      PROTECT(cv_R=allocVector(REALSXP,values.size())); 
+      csa=REAL(cv_R);
+      i=0;
+      for(vector<double>::const_iterator ci=values.begin(); ci!=values.end(); ++ci) {
+	csa[i]=*ci; i++;
+      }
+    }
+
+    SEXP ans_R, names_R;
+    if(max_val!=0) {
+      PROTECT(names_R = allocVector(STRSXP, 3));
+      SET_STRING_ELT(names_R, 0, mkChar("s"));
+      SET_STRING_ELT(names_R, 1, mkChar("e"));
+      SET_STRING_ELT(names_R, 2, mkChar("v"));
+
+      PROTECT(ans_R = allocVector(VECSXP, 3));
+      SET_VECTOR_ELT(ans_R, 0, cs_R);
+      SET_VECTOR_ELT(ans_R, 1, ce_R);
+      SET_VECTOR_ELT(ans_R, 2, cv_R);
+    } else {
+      PROTECT(names_R = allocVector(STRSXP, 2));
+      SET_STRING_ELT(names_R, 0, mkChar("s"));
+      SET_STRING_ELT(names_R, 1, mkChar("e"));
+
+      PROTECT(ans_R = allocVector(VECSXP, 2));
+      SET_VECTOR_ELT(ans_R, 0, cs_R);
+      SET_VECTOR_ELT(ans_R, 1, ce_R);
+    }
+    
+    setAttrib(ans_R, R_NamesSymbol, names_R);
+    
+    if(max_val!=0) {
+      UNPROTECT(5);
+    } else {
+      UNPROTECT(4);
+    }
+    return(ans_R);
+  }
+
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/wdl.cpp	Tue Nov 27 16:13:29 2012 -0500
@@ -0,0 +1,657 @@
+#include <vector>
+#include <string.h>
+#include <iostream>
+#include <string>
+#include <set>
+
+extern "C" {
+#include "R.h"
+#include "Rmath.h"
+#include "Rinternals.h"
+#include "Rdefines.h"
+}
+
+using namespace std;
+using namespace __gnu_cxx; 
+
+//#define DEBUG 1
+
+extern "C" {
+
+  /************************************************************************/
+  /*
+   * lwcc - calculate local window cross-correlation
+   */
+
+  SEXP lwcc(SEXP x_R, // positive strand hist 
+	    SEXP y_R, // negative strand hist of the same length
+	    SEXP osize_R,       // outer boundary distance
+	    SEXP isize_R,        // inner boundary distance
+	    SEXP return_peaks_R, // whether all correlation values, or just peaks should be returned
+	    SEXP min_peak_dist_R, // distance between closest peaks
+	    SEXP min_peak_val_R, // min peak threshold
+	    SEXP tag_weight_R,  // tag weight
+	    SEXP bg_subtract_R, // a flag whether do background subtractio
+	    SEXP bgp_R, // optional background hist for positive strand
+	    SEXP bgn_R, // optional background hist for negative strand
+	    SEXP bg_wsize_R, // window size for the background counts
+	    SEXP bg_weight_R, // optional weighting for the background tags, must compensate for window size difference (including is cutout)
+	    SEXP round_up_R // whether to round up fractional signal tag counts
+	    )
+  {
+
+#ifdef DEBUG  
+    Rprintf("start\n");
+#endif
+    
+    int is=INTEGER(isize_R)[0];
+    int os=INTEGER(osize_R)[0];
+    double rs=((double)(2*os+1));
+    int* x=INTEGER(x_R);
+    int* y=INTEGER(y_R);
+    int n_x=LENGTH(x_R);
+
+    // background-related 
+    int* bgp=INTEGER(bgp_R);
+    int* bgn=INTEGER(bgn_R);
+    int bg_whs=INTEGER(bg_wsize_R)[0];
+
+    int return_peaks=*(INTEGER(return_peaks_R));
+    double min_peak_val=*(REAL(min_peak_val_R));
+    int min_peak_dist=*(INTEGER(min_peak_dist_R));
+    double tag_weight=*(REAL(tag_weight_R));
+
+    const int round_up=*(INTEGER(round_up_R));
+    const int bg_subtract=*(INTEGER(bg_subtract_R));
+    const double bg_weight=*(REAL(bg_weight_R));
+
+    int i; // point at which the value is being calculated
+    int start=os;
+    int end=n_x-os-1;
+
+    // bg tag counts within bg window
+    int bg_pn1=0;
+    int bg_nn1=0;
+    int bg_pn2=0;
+    int bg_nn2=0;
+
+
+  
+    // illustration for counting:
+    //
+    // 012345678901234567890123456789012
+    // ==========------|------==========
+    //
+    //  osize=16; isize=6; 
+
+
+    SEXP nv;
+    double *d_nv;
+    vector<int> ppos;
+    vector<double> pval;
+    if(!return_peaks) {
+      PROTECT(nv=allocVector(REALSXP,n_x)); 
+      d_nv=REAL(nv);
+      for(int i=0;i<n_x;i++) {
+	d_nv[i]=0;
+      }
+    }
+
+#ifdef DEBUG  
+    Rprintf("start=%d end=%d tag_weight=%f\n", start,end,tag_weight);
+    Rprintf("x[1]=%d x[2]=%d y[1]=%d y[2]=%d\n",x[1],x[2],y[1],y[2]);
+#endif
+
+    int lpp=-1; // last peak position
+    double lpv=-1e3; // last peak value
+    
+    double ppv=-1e3; // last value
+    double pppv=-11e-3; // value before last
+
+    int pn1,pn2,nn1,nn2;
+
+    
+    if(bg_subtract) {
+      // pre-initialize background tag counts, 
+      for(int i=0;i<bg_whs;i++) {
+	if(i<n_x) {
+	  bg_pn2+=bgp[i];
+	  bg_nn2+=bgn[i];
+	}
+      }
+    }
+
+
+    for(i=0;i<end;i++) {
+#ifdef DEBUG  
+      //Rprintf("i=%d ", i);
+#endif
+      
+      if(bg_subtract) {
+	// update background counts
+	int nl=i-bg_whs-1;
+
+	if(nl>=0) {
+	  bg_pn1-=bgp[nl];
+	  bg_nn1-=bgn[nl];
+	}
+	bg_pn1+=bgp[i];
+	bg_nn1+=bgn[i];
+
+	if(i>0) {
+	  bg_pn2-=bgp[i-1];
+	  bg_nn2-=bgn[i-1];
+	}
+	int nr=i+bg_whs;
+	if(nr<n_x) {
+	  bg_pn2+=bgp[nr];
+	  bg_nn2+=bgn[nr];
+	}
+      }
+
+      if(i >= start) {
+	// update counts, taking into account masked out regions
+	pn1=pn2=nn1=nn2=0;
+	
+	for(int k=0;k<=(os-is);k++) {
+	  int xp1=x[i-os+k];
+	  int xp2=x[i+os-k];
+	  int xn1=y[i+os-k];
+	  int xn2=y[i-os+k];
+
+	  if(xp1!=-1 && xn1!=-1) {
+	    pn1+=xp1;
+	    nn1+=xn1;
+	  }
+	  if(xp2!=-1 && xn2!=-1) {
+	    pn2+=xp2;
+	    nn2+=xn2;
+	  }
+	}
+      
+	// calculate the means
+	double mp=((double)(pn1+pn2))/rs;
+	double mn=((double)(pn1+pn2))/rs;
+#ifdef DEBUG  
+	Rprintf("mp=%f mn=%f\n",mp,mn);
+#endif
+	// calculate correlation
+	double varp=0;
+	double varn=0;
+	double num=0;
+	double val=-1e3;
+	if(mp>0 & mn>0) {
+	  for(int k=0;k<=(os-is);k++) {
+	    int xp1=x[i-os+k];
+	    int xp2=x[i+os-k];
+	    int xn1=y[i+os-k];
+	    int xn2=y[i-os+k];
+
+	    
+	    if(xp1!=-1 && xn1!=-1) {  
+	      double nnp1=((double) xp1)-mp;
+	      double nnn1=((double) xn1)-mn;
+	      num+=nnp1*nnn1;
+	      varp+=nnp1*nnp1;
+	      varn+=nnn1*nnn1;
+	    }
+	    
+	    if(xp2!=-1 && xn2!=-1) {
+	      double nnp2=((double) xp2)-mp;
+	      double nnn2=((double) xn2)-mn;
+	      num+=nnp2*nnn2;
+	      varp+=nnp2*nnp2;
+	      varn+=nnn2*nnn2;
+	    }
+
+	  }
+	  double tagw;
+	  double spn1=((double)pn1)*tag_weight;
+	  double snn1=((double)nn1)*tag_weight;
+	  double spn2=((double)pn2)*tag_weight;
+	  double snn2=((double)nn2)*tag_weight;
+	  if(round_up) {
+	    if(pn1>0 && spn1<1) { spn1=1.0; }
+	    //if(pn2>0 && spn2<1) { spn2=1.0; }
+	    if(nn1>0 && snn1<1) { snn1=1.0; }
+	    //if(nn2>0 && snn2<1) { snn2=1.0; }
+	  }
+
+	  if(bg_subtract) {
+	    spn1-=((double)bg_pn1)*bg_weight;
+	    snn1-=((double)bg_nn2)*bg_weight;
+	    spn2-=((double)bg_pn2)*bg_weight;
+	    snn2-=((double)bg_nn1)*bg_weight;
+
+	    if(spn2<0) spn2=0;
+	    if(snn2<0) snn2=0;
+	    
+	    if(spn1>0 && snn1>0) {
+	      tagw=(2.0*sqrt(spn1*snn1)-(spn2+snn2+1.0));
+	    } else {
+	      tagw=-(spn2+snn2+1.0);
+	    }
+	    //cout<<"bg_pn1="<<bg_pn1<<"; bg_pn2="<<bg_pn2<<"; bg_nn1="<<bg_nn1<<"; bg_nn2="<<bg_nn2<<endl;
+	  } else {
+	    tagw=2.0*sqrt(spn1*snn1)-(spn2+snn2);
+	  }
+
+	  if(tagw<0) {
+	    val=0.0; 
+	  } else {
+	    if(num==0.0) {
+	      val=0;
+	    } else {
+	      val=num/(sqrt(varp*varn));
+	    }
+	    val=val*sqrt(tagw) + tagw;
+
+	  }
+	  //cout<<"val="<<val<<endl;
+
+#ifdef DEBUG  
+        Rprintf("pn1=%d pn2=%d nn1=%d nn2=%d tag.weight=%f tagw=%f\n",pn1,pn2,nn1,nn2,tag_weight,tagw);
+	Rprintf("tagw=%f varp=%f varn=%f num=%f cor=%f val=%f\n",tagw,varp,varn,num,num/sqrt(varp*varn),val);
+#endif
+	}
+
+
+	
+	if(return_peaks) {
+	  // determine if previous position was a peak
+	  if(ppv>min_peak_val && ppv>val && ppv>pppv) {
+	    if(lpp>0 && (i-lpp+1)>min_peak_dist) {
+	      // record previous peak position
+	      ppos.push_back(lpp);
+	      pval.push_back(lpv);
+#ifdef DEBUG  
+	      Rprintf("recording peak x=%d y=%f d=%d\n",lpp,lpv,(i-lpp));
+#endif	    
+	      lpp=i-1; lpv=ppv;
+#ifdef DEBUG  
+	      Rprintf("updated peak to x=%d y=%f\n",lpp,lpv);
+#endif	    
+	    } else {
+	      if(ppv>lpv) {
+		// update last peak positions
+#ifdef DEBUG  
+		Rprintf("skipping peak x=%d y=%f d=%d in favor of x=%d y=%f\n",lpp,lpv,(i-lpp),i-1,ppv);
+#endif
+		lpp=i-1; lpv=ppv;
+	      }
+	    }
+	  }
+
+	  // update previous values
+	  if(val!=ppv) {
+	    pppv=ppv; ppv=val;
+	  }
+	} else {
+	  d_nv[i]=val;
+	}
+      }
+    }
+
+    if(return_peaks) {
+      // record last position
+      if(lpp>0) {
+#ifdef DEBUG  
+	Rprintf("recording last peak x=%d y=%f\n",lpp,lpv);
+#endif
+	ppos.push_back(lpp);
+	pval.push_back(lpv);
+      }
+
+      SEXP rpp_R,rpv_R;
+      PROTECT(rpp_R=allocVector(INTSXP,ppos.size())); 
+      PROTECT(rpv_R=allocVector(REALSXP,ppos.size())); 
+      int* rpp=INTEGER(rpp_R);
+      double* rpv=REAL(rpv_R);
+
+      for(int i=0;i<ppos.size();i++) {
+	rpp[i]=ppos[i];
+	rpv[i]=pval[i];
+      }
+    
+      SEXP ans_R, names_R;
+      PROTECT(names_R = allocVector(STRSXP, 2));
+      SET_STRING_ELT(names_R, 0, mkChar("x"));
+      SET_STRING_ELT(names_R, 1, mkChar("v"));
+    
+      PROTECT(ans_R = allocVector(VECSXP, 2));
+      SET_VECTOR_ELT(ans_R, 0, rpp_R);
+      SET_VECTOR_ELT(ans_R, 1, rpv_R);
+      setAttrib(ans_R, R_NamesSymbol, names_R);
+  
+      UNPROTECT(4);
+      return(ans_R);
+    } else {
+      UNPROTECT(1);
+      return(nv);
+    }
+
+  }
+
+
+
+  /************************************************************************/
+  /*
+   * wtd - window tag difference implementation
+   */
+
+  SEXP wtd(SEXP x_R, // positive strand hist 
+	   SEXP y_R, // negative strand hist of the same length
+	   SEXP wsize_R,       // outer boundary distance
+	   SEXP return_peaks_R, // whether all correlation values, or just peaks should be returned
+	   SEXP min_peak_dist_R, // distance between closest peaks
+	   SEXP min_peak_val_R, // min peak threshold
+	   SEXP direct_count_R, // whether tag weighting should not be done
+	   SEXP tag_weight_R,  // tag weight
+	   SEXP ignore_masking_R,  // whether to ignore masked regions
+	   SEXP bg_subtract_R, // a flag whether do background subtractio
+	   SEXP bgp_R, // optional background hist for positive strand
+	   SEXP bgn_R, // optional background hist for negative strand
+	   SEXP bg_wsize_R, // window size for the background counts
+	   SEXP bg_weight_R, // optional weighting for the background tags, must compensate for window size difference
+	   SEXP round_up_R // whether to round up fractional signal tag counts
+	   )
+  {
+
+#ifdef DEBUG  
+    Rprintf("start\n");
+#endif
+    
+    int whs=INTEGER(wsize_R)[0];
+    int* x=INTEGER(x_R);
+    int* y=INTEGER(y_R);
+    int n_x=LENGTH(x_R);
+    
+    // background-related 
+    int* bgp=INTEGER(bgp_R);
+    int* bgn=INTEGER(bgn_R);
+    int bg_whs=INTEGER(bg_wsize_R)[0];
+    
+
+    const int return_peaks=*(INTEGER(return_peaks_R));
+    const int direct_count=*(INTEGER(direct_count_R));
+    const int ignore_masking=*(INTEGER(ignore_masking_R));
+    const double min_peak_val=*(REAL(min_peak_val_R));
+    const int min_peak_dist=*(INTEGER(min_peak_dist_R));
+    const double tag_weight=*(REAL(tag_weight_R));
+    
+    const int round_up=*(INTEGER(round_up_R));
+    const int bg_subtract=*(INTEGER(bg_subtract_R));
+    const double bg_weight=*(REAL(bg_weight_R));
+    
+    int i; // point at which the value is being calculated
+    int start=whs+1;
+    int end=n_x-whs-1;
+
+    // tag counts to calculate the means
+    int pn1=0;
+    int pn2=0;
+    int nn1=0;
+    int nn2=0;
+
+    // bg tag counts within bg window
+    int bg_pn1=0;
+    int bg_pn2=0;
+    int bg_nn1=0;
+    int bg_nn2=0;
+    
+    SEXP nv;
+    double *d_nv;
+    vector<int> ppos;
+    vector<double> pval;
+    if(!return_peaks) {
+      PROTECT(nv=allocVector(REALSXP,n_x)); 
+      d_nv=REAL(nv);
+      for(int i=0;i<n_x;i++) {
+	d_nv[i]=0;
+      }
+    }
+
+#ifdef DEBUG  
+    Rprintf("whs=%d start=%d end=%d tag_weight=%f ignore_masing=%d\n", whs, start,end,tag_weight,ignore_masking);
+    Rprintf("x[1]=%d x[2]=%d y[1]=%d y[2]=%d\n",x[1],x[2],y[1],y[2]);
+#endif
+
+    int lpp=-1; // last peak position
+    double lpv=-1000; // last peak value
+    
+    double ppv=-1000; // last value
+    int ppl=-1; // position of the last value
+    double pppv=-1000; // value before last
+
+
+    if(ignore_masking==1) {
+      for(int i=0;i<whs;i++) {
+	pn1+=x[i];
+	pn2+=x[i+whs+1];
+	nn1+=y[i];
+	nn2+=y[i+whs+1];
+
+      }
+    }
+
+    if(bg_subtract) {
+      // pre-initialize background tag counts, 
+      for(int i=0;i<bg_whs;i++) {
+	if(i<n_x) {
+	  bg_pn2+=bgp[i];
+	  bg_nn2+=bgn[i];
+	}
+      }
+      // increment center of background count window to the start position
+      for(int i=0;i<start;i++) {
+	// update background counts
+	int nl=i-bg_whs-1;
+
+	if(nl>=0) {
+	  bg_pn1-=bgp[nl];
+	  bg_nn1-=bgn[nl];
+	}
+	bg_pn1+=bgp[i];
+	bg_nn1+=bgn[i];
+
+	if(i>0) {
+	  bg_pn2-=bgp[i-1];
+	  bg_nn2-=bgn[i-1];
+	}
+	int nr=i+bg_whs;
+	if(nr<n_x) {
+	  bg_pn2+=bgp[nr];
+	  bg_nn2+=bgn[nr];
+	}
+      }
+
+    }
+
+    
+#ifdef DEBUG  
+    Rprintf("initialization: i=%d pn1=%d, pn2=%d, nn1=%d, nn2=%d", i,pn1,pn2,nn1,nn2);
+#endif
+
+    for(i=start;i<end;i++) {
+      if(bg_subtract) {
+	// update background counts
+	int nl=i-bg_whs-1;
+
+	if(nl>=0) {
+	  bg_pn1-=bgp[nl];
+	  bg_nn1-=bgn[nl];
+	}
+	bg_pn1+=bgp[i];
+	bg_nn1+=bgn[i];
+
+	if(i>0) {
+	  bg_pn2-=bgp[i-1];
+	  bg_nn2-=bgn[i-1];
+	}
+	int nr=i+bg_whs;
+	if(nr<n_x) {
+	  bg_pn2+=bgp[nr];
+	  bg_nn2+=bgn[nr];
+	}
+      }
+
+      // update counts
+      if(ignore_masking==1) {
+	pn1+=x[i-1]-x[i-whs-1];
+	pn2+=x[i+whs]-x[i-1];
+	nn1+=y[i-1]-y[i-whs-1];
+	nn2+=y[i+whs]-y[i-1];
+
+      } else {
+
+	pn1=pn2=nn1=nn2=0;
+	
+	for(int k=0;k<whs;k++) {
+	  int xp1=x[i-k-1];
+	  int xp2=x[i+k];
+	  int xn1=y[i-k-1];
+	  int xn2=y[i+k];
+
+	  // omit masked positions
+	  if(xp1!=-1 && xn1!=-1 && xp2!=-1 && xn2!=-1) {
+	    pn1+=xp1;
+	    nn1+=xn1;
+	    pn2+=xp2;
+	    nn2+=xn2;
+	  }
+	}
+      }
+
+      double val;
+      double spn1=((double)pn1)*tag_weight;
+      double snn1=((double)nn1)*tag_weight;
+      double spn2=((double)pn2)*tag_weight;
+      double snn2=((double)nn2)*tag_weight;
+      if(round_up) {
+	if(pn1>0 && spn1<1) { spn1=1.0; }
+	//if(pn2>0 && spn2<1) { spn2=1.0; }
+	//if(nn1>0 && snn1<1) { snn1=1.0; }
+	if(nn2>0 && snn2<1) { snn2=1.0; }
+      }
+
+      if(direct_count) {
+	val=spn1+snn2;
+	if(round_up && val<1) {
+	  val=1.0;
+	}
+	if(bg_subtract) {
+	  val-=((double) (bg_pn1+bg_nn2))*bg_weight;
+	}
+      } else {
+	if(bg_subtract) {
+	  spn1-=((double)bg_pn1)*bg_weight;
+	  snn1-=((double)bg_nn1)*bg_weight;
+	  spn2-=((double)bg_pn2)*bg_weight;
+	  snn2-=((double)bg_nn2)*bg_weight;
+
+	  if(spn2<0) spn2=0;
+	  if(snn1<0) snn1=0;
+
+	  if(spn1>0 && snn2>0) {
+	    val=(2.0*sqrt(spn1*snn2)-(spn2+snn1+1.0));
+	  } else {
+	    val=-(spn2+snn1+1.0);
+	  }
+	} else {
+	  val=2.0*sqrt(spn1*snn2)-(spn2+snn1+tag_weight);
+	}
+      }	
+      //double val=sqrt(pn1*nn2);
+      //if(pn2>nn1) { val-=pn2; } else { val-=pn1; }
+#ifdef DEBUG  
+      Rprintf("update: i=%d pn1=%d pn2=%d nn1=%d nn2=%d val=%f\n",i,pn1,pn2,nn1,nn2,val);
+#endif
+      
+      if(return_peaks) {
+	// determine if previous position was a peak
+	if(ppv>min_peak_val && ppv>val && ppv>pppv) {
+	  if(lpp>0 && (i-lpp+1)>min_peak_dist) {
+	    // record previous peak position
+	    ppos.push_back(lpp);
+	    pval.push_back(lpv);
+#ifdef DEBUG  
+	    Rprintf("recording peak x=%d y=%f d=%d\n",lpp,lpv,(i-lpp));
+#endif	    
+	    if(ppl!=-1 && ppl!=i-1) {
+	      lpp=(int) round((ppl+i-1)/2);
+	    } else {
+	      lpp=i-1;
+	    }
+	    lpv=ppv;
+#ifdef DEBUG  
+	    Rprintf("updated peak to x=%d y=%f\n",lpp,lpv);
+#endif	    
+	  } else {
+	    if(ppv>lpv) {
+	      // update last peak positions
+#ifdef DEBUG  
+	      Rprintf("skipping peak x=%d y=%f d=%d in favor of x=%d y=%f\n",lpp,lpv,(i-lpp),i-1,ppv);
+#endif
+	      if(ppl!=-1 && ppl!=i-1) {
+		lpp=(int) round((ppl+i-1)/2);
+	      } else {
+		lpp=i-1;
+	      }
+	      lpv=ppv;
+	    }
+	  }
+	}
+	
+	// update previous values
+	if(val!=ppv) {
+	  pppv=ppv; ppv=val; ppl=i;
+	}
+      } else {
+	d_nv[i]=val;
+      }
+    }
+
+    if(return_peaks) {
+      // record last position
+      if(lpp>0) {
+#ifdef DEBUG  
+	Rprintf("recording last peak x=%d y=%f\n",lpp,lpv);
+#endif
+	ppos.push_back(lpp);
+	pval.push_back(lpv);
+      }
+
+      SEXP rpp_R,rpv_R;
+      PROTECT(rpp_R=allocVector(INTSXP,ppos.size())); 
+      PROTECT(rpv_R=allocVector(REALSXP,ppos.size())); 
+      int* rpp=INTEGER(rpp_R);
+      double* rpv=REAL(rpv_R);
+
+      for(int i=0;i<ppos.size();i++) {
+	rpp[i]=ppos[i];
+	rpv[i]=pval[i];
+      }
+    
+      SEXP ans_R, names_R;
+      PROTECT(names_R = allocVector(STRSXP, 2));
+      SET_STRING_ELT(names_R, 0, mkChar("x"));
+      SET_STRING_ELT(names_R, 1, mkChar("v"));
+    
+      PROTECT(ans_R = allocVector(VECSXP, 2));
+      SET_VECTOR_ELT(ans_R, 0, rpp_R);
+      SET_VECTOR_ELT(ans_R, 1, rpv_R);
+      setAttrib(ans_R, R_NamesSymbol, names_R);
+  
+      UNPROTECT(4);
+      return(ans_R);
+    } else {
+      UNPROTECT(1);
+      return(nv);
+    }
+
+  }
+
+
+}
+
+