spp_phantompeak: spp/man/spp-package.Rd annotate

annotate spp/man/spp-package.Rd @ 6:ce08b0efa3fd draft

Uploaded

author	zzhou
date	Tue, 27 Nov 2012 16:11:40 -0500
parents
children

rev	line source
6 ce08b0efa3fd Uploaded zzhou parents: diff changeset	1 \name{spp-package}
ce08b0efa3fd Uploaded zzhou parents: diff changeset	2 \alias{spp-package}
ce08b0efa3fd Uploaded zzhou parents: diff changeset	3 \alias{spp}
ce08b0efa3fd Uploaded zzhou parents: diff changeset	4 \docType{package}
ce08b0efa3fd Uploaded zzhou parents: diff changeset	5 \title{
ce08b0efa3fd Uploaded zzhou parents: diff changeset	6 ChIP-seq (Solexa) Processing Pipeline
ce08b0efa3fd Uploaded zzhou parents: diff changeset	7 }
ce08b0efa3fd Uploaded zzhou parents: diff changeset	8 \description{
ce08b0efa3fd Uploaded zzhou parents: diff changeset	9 A set of routines for reading short sequence alignments, calculating tag
ce08b0efa3fd Uploaded zzhou parents: diff changeset	10 density, estimates of statistically significant enrichment/depletion
ce08b0efa3fd Uploaded zzhou parents: diff changeset	11 along the chromosome, identifying point binding positions (peaks), and
ce08b0efa3fd Uploaded zzhou parents: diff changeset	12 characterizing saturation properties related to sequencing depth.
ce08b0efa3fd Uploaded zzhou parents: diff changeset	13 }
ce08b0efa3fd Uploaded zzhou parents: diff changeset	14 \details{
ce08b0efa3fd Uploaded zzhou parents: diff changeset	15 \tabular{ll}{
ce08b0efa3fd Uploaded zzhou parents: diff changeset	16 Package: \tab spp\cr
ce08b0efa3fd Uploaded zzhou parents: diff changeset	17 Type: \tab Package\cr
ce08b0efa3fd Uploaded zzhou parents: diff changeset	18 Version: \tab 1.8\cr
ce08b0efa3fd Uploaded zzhou parents: diff changeset	19 Date: \tab 2008-11-14\cr
ce08b0efa3fd Uploaded zzhou parents: diff changeset	20 License: \tab What license is it under?\cr
ce08b0efa3fd Uploaded zzhou parents: diff changeset	21 LazyLoad: \tab yes\cr
ce08b0efa3fd Uploaded zzhou parents: diff changeset	22 }
ce08b0efa3fd Uploaded zzhou parents: diff changeset	23 See example below for typical processing sequence.y
ce08b0efa3fd Uploaded zzhou parents: diff changeset	24 }
ce08b0efa3fd Uploaded zzhou parents: diff changeset	25 \author{Peter Kharchenko <peter.kharchenko@post.harvard.edu>}
ce08b0efa3fd Uploaded zzhou parents: diff changeset	26 \references{
ce08b0efa3fd Uploaded zzhou parents: diff changeset	27 Kharchenko P., Tolstorukov M., Park P. "Design and analysis of ChIP-seq
ce08b0efa3fd Uploaded zzhou parents: diff changeset	28 experiments for DNA-binding proteins." Nature Biotech. doi:10.1038/nbt.1508
ce08b0efa3fd Uploaded zzhou parents: diff changeset	29 }
ce08b0efa3fd Uploaded zzhou parents: diff changeset	30
ce08b0efa3fd Uploaded zzhou parents: diff changeset	31 \examples{
ce08b0efa3fd Uploaded zzhou parents: diff changeset	32
ce08b0efa3fd Uploaded zzhou parents: diff changeset	33 # load the library
ce08b0efa3fd Uploaded zzhou parents: diff changeset	34 library(spp);
ce08b0efa3fd Uploaded zzhou parents: diff changeset	35
ce08b0efa3fd Uploaded zzhou parents: diff changeset	36 ## The following section shows how to initialize a cluster of 8 nodes for parallel processing
ce08b0efa3fd Uploaded zzhou parents: diff changeset	37 ## To enable parallel processing, uncomment the next three lines, and comment out "cluster<-NULL";
ce08b0efa3fd Uploaded zzhou parents: diff changeset	38 ## see "snow" package manual for details.
ce08b0efa3fd Uploaded zzhou parents: diff changeset	39 #library(snow)
ce08b0efa3fd Uploaded zzhou parents: diff changeset	40 #cluster <- makeCluster(2);
ce08b0efa3fd Uploaded zzhou parents: diff changeset	41 #invisible(clusterCall(cluster,source,"routines.r"));
ce08b0efa3fd Uploaded zzhou parents: diff changeset	42 cluster <- NULL;
ce08b0efa3fd Uploaded zzhou parents: diff changeset	43
ce08b0efa3fd Uploaded zzhou parents: diff changeset	44
ce08b0efa3fd Uploaded zzhou parents: diff changeset	45
ce08b0efa3fd Uploaded zzhou parents: diff changeset	46 # read in tag alignments
ce08b0efa3fd Uploaded zzhou parents: diff changeset	47 chip.data <- read.eland.tags("chip.eland.alignment");
ce08b0efa3fd Uploaded zzhou parents: diff changeset	48 input.data <- read.eland.tags("input.eland.alignment");
ce08b0efa3fd Uploaded zzhou parents: diff changeset	49
ce08b0efa3fd Uploaded zzhou parents: diff changeset	50 # get binding info from cross-correlation profile
ce08b0efa3fd Uploaded zzhou parents: diff changeset	51 # srange gives the possible range for the size of the protected region;
ce08b0efa3fd Uploaded zzhou parents: diff changeset	52 # srange should be higher than tag length; making the upper boundary too high will increase calculation time
ce08b0efa3fd Uploaded zzhou parents: diff changeset	53 #
ce08b0efa3fd Uploaded zzhou parents: diff changeset	54 # bin - bin tags within the specified number of basepairs to speed up calculation;
ce08b0efa3fd Uploaded zzhou parents: diff changeset	55 # increasing bin size decreases the accuracy of the determined parameters
ce08b0efa3fd Uploaded zzhou parents: diff changeset	56 binding.characteristics <- get.binding.characteristics(chip.data,srange=c(50,500),bin=5,cluster=cluster);
ce08b0efa3fd Uploaded zzhou parents: diff changeset	57
ce08b0efa3fd Uploaded zzhou parents: diff changeset	58
ce08b0efa3fd Uploaded zzhou parents: diff changeset	59 # plot cross-correlation profile
ce08b0efa3fd Uploaded zzhou parents: diff changeset	60 pdf(file="example.crosscorrelation.pdf",width=5,height=5)
ce08b0efa3fd Uploaded zzhou parents: diff changeset	61 par(mar = c(3.5,3.5,1.0,0.5), mgp = c(2,0.65,0), cex = 0.8);
ce08b0efa3fd Uploaded zzhou parents: diff changeset	62 plot(binding.characteristics$cross.correlation,type='l',xlab="strand shift",ylab="cross-correlation");
ce08b0efa3fd Uploaded zzhou parents: diff changeset	63 abline(v=binding.characteristics$peak$x,lty=2,col=2)
ce08b0efa3fd Uploaded zzhou parents: diff changeset	64 dev.off();
ce08b0efa3fd Uploaded zzhou parents: diff changeset	65
ce08b0efa3fd Uploaded zzhou parents: diff changeset	66 # select informative tags based on the binding characteristics
ce08b0efa3fd Uploaded zzhou parents: diff changeset	67 chip.data <- select.informative.tags(chip.data,binding.characteristics);
ce08b0efa3fd Uploaded zzhou parents: diff changeset	68 input.data <- select.informative.tags(input.data,binding.characteristics);
ce08b0efa3fd Uploaded zzhou parents: diff changeset	69
ce08b0efa3fd Uploaded zzhou parents: diff changeset	70 # restrict or remove positions with anomalous number of tags relative
ce08b0efa3fd Uploaded zzhou parents: diff changeset	71 # to the local density
ce08b0efa3fd Uploaded zzhou parents: diff changeset	72 chip.data <- remove.local.tag.anomalies(chip.data);
ce08b0efa3fd Uploaded zzhou parents: diff changeset	73 input.data <- remove.local.tag.anomalies(input.data);
ce08b0efa3fd Uploaded zzhou parents: diff changeset	74
ce08b0efa3fd Uploaded zzhou parents: diff changeset	75
ce08b0efa3fd Uploaded zzhou parents: diff changeset	76 # output smoothed tag density (subtracting re-scaled input) into a WIG file
ce08b0efa3fd Uploaded zzhou parents: diff changeset	77 # note that the tags are shifted by half of the peak separation distance
ce08b0efa3fd Uploaded zzhou parents: diff changeset	78 smoothed.density <- get.smoothed.tag.density(chip.data,control.tags=input.data,bandwidth=200,step=100,tag.shift=round(binding.characteristics$peak$x/2));
ce08b0efa3fd Uploaded zzhou parents: diff changeset	79 writewig(smoothed.density,"example.density.wig","Example smoothed, background-subtracted tag density");
ce08b0efa3fd Uploaded zzhou parents: diff changeset	80 rm(smoothed.density);
ce08b0efa3fd Uploaded zzhou parents: diff changeset	81
ce08b0efa3fd Uploaded zzhou parents: diff changeset	82 # output conservative enrichment estimates
ce08b0efa3fd Uploaded zzhou parents: diff changeset	83 # alpha specifies significance level at which confidence intervals will be estimated
ce08b0efa3fd Uploaded zzhou parents: diff changeset	84 enrichment.estimates <- get.conservative.fold.enrichment.profile(chip.data,input.data,fws=2*binding.characteristics$whs,step=100,alpha=0.01);
ce08b0efa3fd Uploaded zzhou parents: diff changeset	85 writewig(enrichment.estimates,"example.enrichment.estimates.wig","Example conservative fold-enrichment/depletion estimates shown on log2 scale");
ce08b0efa3fd Uploaded zzhou parents: diff changeset	86 rm(enrichment.estimates);
ce08b0efa3fd Uploaded zzhou parents: diff changeset	87
ce08b0efa3fd Uploaded zzhou parents: diff changeset	88
ce08b0efa3fd Uploaded zzhou parents: diff changeset	89 # binding detection parameters
ce08b0efa3fd Uploaded zzhou parents: diff changeset	90 # desired FDR. Alternatively, an E-value can be supplied to the method calls below instead of the fdr parameter
ce08b0efa3fd Uploaded zzhou parents: diff changeset	91 fdr <- 1e-2;
ce08b0efa3fd Uploaded zzhou parents: diff changeset	92 # the binding.characteristics contains the optimized half-size for binding detection window
ce08b0efa3fd Uploaded zzhou parents: diff changeset	93 detection.window.halfsize <- binding.characteristics$whs;
ce08b0efa3fd Uploaded zzhou parents: diff changeset	94
ce08b0efa3fd Uploaded zzhou parents: diff changeset	95 # determine binding positions using wtd method
ce08b0efa3fd Uploaded zzhou parents: diff changeset	96 bp <- find.binding.positions(signal.data=chip.data,control.data=input.data,fdr=fdr,method=tag.wtd,whs=detection.window.halfsize,cluster=cluster)
ce08b0efa3fd Uploaded zzhou parents: diff changeset	97
ce08b0efa3fd Uploaded zzhou parents: diff changeset	98 # alternatively determined binding positions using lwcc method (note: this takes longer than wtd)
ce08b0efa3fd Uploaded zzhou parents: diff changeset	99 # bp <- find.binding.positions(signal.data=chip.data,control.data=input.data,fdr=fdr,method=tag.lwcc,whs=detection.window.halfsize,cluster=cluster)
ce08b0efa3fd Uploaded zzhou parents: diff changeset	100
ce08b0efa3fd Uploaded zzhou parents: diff changeset	101 print(paste("detected",sum(unlist(lapply(bp$npl,function(d) length(d$x)))),"peaks"));
ce08b0efa3fd Uploaded zzhou parents: diff changeset	102
ce08b0efa3fd Uploaded zzhou parents: diff changeset	103 # output detected binding positions
ce08b0efa3fd Uploaded zzhou parents: diff changeset	104 output.binding.results(bp,"example.binding.positions.txt");
ce08b0efa3fd Uploaded zzhou parents: diff changeset	105
ce08b0efa3fd Uploaded zzhou parents: diff changeset	106
ce08b0efa3fd Uploaded zzhou parents: diff changeset	107 # -------------------------------------------------------------------------------------------
ce08b0efa3fd Uploaded zzhou parents: diff changeset	108 # the set of commands in the following section illustrates methods for saturation analysis
ce08b0efa3fd Uploaded zzhou parents: diff changeset	109 # these are separated from the previous section, since they are highly CPU intensive
ce08b0efa3fd Uploaded zzhou parents: diff changeset	110 # -------------------------------------------------------------------------------------------
ce08b0efa3fd Uploaded zzhou parents: diff changeset	111
ce08b0efa3fd Uploaded zzhou parents: diff changeset	112 # determine MSER
ce08b0efa3fd Uploaded zzhou parents: diff changeset	113 # note: this will take approximately 10-15x the amount of time the initial binding detection did
ce08b0efa3fd Uploaded zzhou parents: diff changeset	114 # The saturation criteria here is 0.99 consistency in the set of binding positions when adding 1e5 tags.
ce08b0efa3fd Uploaded zzhou parents: diff changeset	115 # To ensure convergence the number of subsampled chains (n.chains) should be higher (80)
ce08b0efa3fd Uploaded zzhou parents: diff changeset	116 mser <- get.mser(chip.data,input.data,step.size=1e5,test.agreement=0.99,n.chains=8,cluster=cluster,fdr=fdr,method=tag.wtd,whs=detection.window.halfsize)
ce08b0efa3fd Uploaded zzhou parents: diff changeset	117
ce08b0efa3fd Uploaded zzhou parents: diff changeset	118 print(paste("MSER at a current depth is",mser));
ce08b0efa3fd Uploaded zzhou parents: diff changeset	119
ce08b0efa3fd Uploaded zzhou parents: diff changeset	120 # note: an MSER value of 1 or very near one implies that the set of detected binding positions satisfies saturation criteria without
ce08b0efa3fd Uploaded zzhou parents: diff changeset	121 # additional selection by fold-enrichment ratios. In other words, the dataset has reached saturation in a traditional sense (absolute saturation).
ce08b0efa3fd Uploaded zzhou parents: diff changeset	122
ce08b0efa3fd Uploaded zzhou parents: diff changeset	123 # interpolate MSER dependency on tag count
ce08b0efa3fd Uploaded zzhou parents: diff changeset	124 # note: this requires considerably more calculations than the previous steps (~ 3x more than the first MSER calculation)
ce08b0efa3fd Uploaded zzhou parents: diff changeset	125 # Here we interpolate MSER dependency to determine a point at which MSER of 2 is reached
ce08b0efa3fd Uploaded zzhou parents: diff changeset	126 # The interpolation will be based on the difference in MSER at the current depth, and a depth at 5e5 fewer tags (n.steps=6);
ce08b0efa3fd Uploaded zzhou parents: diff changeset	127 # evaluation of the intermediate points is omitted here to speed up the calculation (excluded.steps parameter)
ce08b0efa3fd Uploaded zzhou parents: diff changeset	128 # A total of 7 chains is used here to speed up calculation, whereas a higher number of chains (50) would give good convergence
ce08b0efa3fd Uploaded zzhou parents: diff changeset	129 msers <- get.mser.interpolation(chip.data,input.data,step.size=1e5,test.agreement=0.99, target.fold.enrichment=2, n.chains=7,n.steps=6,excluded.steps=c(2:4),cluster=cluster,fdr=fdr,method=tag.wtd,whs=detection.window.halfsize)
ce08b0efa3fd Uploaded zzhou parents: diff changeset	130
ce08b0efa3fd Uploaded zzhou parents: diff changeset	131 print(paste("predicted sequencing depth =",round(unlist(lapply(msers,function(x) x$prediction))/1e6,5)," million tags"))
ce08b0efa3fd Uploaded zzhou parents: diff changeset	132
ce08b0efa3fd Uploaded zzhou parents: diff changeset	133
ce08b0efa3fd Uploaded zzhou parents: diff changeset	134 # note: the interpolation will return NA prediction if the dataset has reached absolute saturation at the current depth.
ce08b0efa3fd Uploaded zzhou parents: diff changeset	135 # note: use return.chains=T to also calculated random chains (returned under msers$chains field) - these can be passed back as
ce08b0efa3fd Uploaded zzhou parents: diff changeset	136 # "get.mser.interpolation( ..., chains=msers$chains)" to calculate predictions for another target.fold.enrichment value
ce08b0efa3fd Uploaded zzhou parents: diff changeset	137 # without having to recalculate the random chain predictions.
ce08b0efa3fd Uploaded zzhou parents: diff changeset	138
ce08b0efa3fd Uploaded zzhou parents: diff changeset	139 ## stop cluster if it was initialized
ce08b0efa3fd Uploaded zzhou parents: diff changeset	140 #stopCluster(cluster);
ce08b0efa3fd Uploaded zzhou parents: diff changeset	141
ce08b0efa3fd Uploaded zzhou parents: diff changeset	142
ce08b0efa3fd Uploaded zzhou parents: diff changeset	143
ce08b0efa3fd Uploaded zzhou parents: diff changeset	144 }

Mercurial > repos > zzhou > spp_phantompeak

annotate spp/man/spp-package.Rd @ 6:ce08b0efa3fd draft