comparison spp/man/find.binding.positions.Rd @ 15:e689b83b0257 draft

Uploaded
author zzhou
date Tue, 27 Nov 2012 16:15:21 -0500
parents ce08b0efa3fd
children
comparison
equal deleted inserted replaced
14:918fecc1e7bb 15:e689b83b0257
1 \name{find.binding.positions}
2 \alias{find.binding.positions}
3 %- Also NEED an '\alias' for EACH other topic documented here.
4 \title{ Determine significant point protein binding positions (peaks) }
5 \description{
6 Given the signal and optional control (input) data, determine location of the
7 statistically significant point binding positions. If the control data
8 is not provided, the statistical significance can be assessed based on
9 tag randomization. The method also provides options for masking
10 regions exhibiting strong signals within the control data.
11 }
12 \usage{
13 find.binding.positions(signal.data, e.value = NULL, fdr = NULL, masked.data = NULL, control.data = NULL, min.dist = 200, window.size = 4e+07, cluster = NULL, debug = T, n.randomizations = 3, shuffle.window = 1, min.thr = 0, topN = NULL, tag.count.whs = 100, enrichment.z = 2, method = tag.wtd, tec.filter = T, tec.window.size = 10000, tec.masking.window.size=tec.window.size, tec.z = 5, tec.poisson.z=5,tec.poisson.ratio=5, n.control.samples = 1, enrichment.background.scales = c(1, 5, 10), background.density.scaling = F, use.randomized.controls = F, ...)
14 }
15 %- maybe also 'usage' for other objects documented here.
16 \arguments{
17 ~~ tag data ~~
18 \item{signal.data}{ signal tag vector list }
19 \item{control.data}{ optional control (input) tag vector list }
20
21 ~~ position stringency criteria ~~
22 \item{e.value}{ E-value defining the desired statistical significance
23 of binding positions. }
24 \item{fdr}{ FDR defining statistical significance of binding positions }
25 \item{topN}{ instead of determining statistical significance
26 thresholds, return the specified number of highest-scoring
27 positions}
28
29 ~~ other params ~~
30 \item{whs}{ window half-sized that should be used for binding
31 detection (e.g. determined from cross-correlation profiles)}
32 \item{masked.data}{ optional set of coordinates that should be masked
33 (e.g. known non-unique regions) }
34 \item{min.dist}{ minimal distance that must separate detected binding
35 positions. In case multiple binding positions are detected within
36 such distance, the position with the highest score is returned. }
37 \item{window.size}{ size of the window used to segment the chromosome
38 during calculations to reduce memory usage. }
39 \item{cluster}{ optional \code{snow} cluster to parallelize the
40 processing on }
41 \item{min.thr}{ minimal score requirement for a peak }
42 \item{background.density.scaling}{ If TRUE, regions of significant tag
43 enrichment will be masked out when calculating size ratio of the
44 signal to control datasets (to estimate ratio of the background tag
45 density). If FALSE, the dataset ratio will be equal to the ratio of
46 the number of tags in each dataset.}
47
48 ~~ randomized controls ~~
49 \item{n.randomizations}{ number of tag randomziations that should be
50 performed (when the control data is not provided) }
51 \item{use.randomized.controls}{ Use randomized tag control, even if
52 \code{control.data} is supplied. }
53 \item{shuffle.window}{ during tag randomizations, tags will be split
54 into groups of \code{shuffle.window} and will be maintained
55 together throughout the randomization. }
56
57 ~~ fold-enrichment confidence intervals
58 \item{tag.count.whs}{ half-size of a window used to assess fold
59 enrichment of a binding position}
60 \item{enrichment.z}{ Z-score used to define the significance level of
61 the fold-enrichment confidence intervals }
62 \item{enrichment.background.scales}{ In estimating the peak
63 fold-enrichment confidence intervals, the background tag density is
64 estimated based on windows with half-sizes of
65 \code{2*tag.count.whs*enrichment.background.scales}. }
66 \item{method}{ either \code{tag.wtd} for WTD method, or
67 \code{tag.lwcc} for MTC method}
68 \item{mle.filter}{ If turned on, will exclude predicted positions
69 whose MLE enrichment ratio (for any of the background scales) is
70 below a specified min.mle.threshold }
71 \item{min.mle.threshold}{ MLE enrichment ratio threshold that each
72 predicted position must exceed if mle.filter is turned on. }
73
74 ~~ masking regions of significant control enrichment ~~
75 \item{tec.filter}{ Whether to mask out the regions exhibiting
76 significant enrichment in the control data in doing other
77 calculations. The regions are identified using Poisson statistics
78 within sliding windows, either relative to the scaled signal (tec.z), or
79 relative to randomly-distributed expectation (tec.poisson.z).}
80 \item{tec.window.size}{ size of the window used to determine
81 significantly enrichent control regions }
82 \item{tec.masking.window.size}{ size of the window used to mask
83 the area around significantly enrichent control regions }
84 \item{tec.z}{ Z-score defining statistical stringency by which a given
85 window is determined to be significantly higher in the input than in
86 the signal, and masked if that is the case.}
87 \item{tec.poisson.z}{ Z-score defining statistical stringency by which a given
88 window is determined to be significantly higher than the
89 tec.poisson.ratio above the expected uniform input background. }
90 \item{tec.poisson.ratio}{ Fold ratio by which input must exceed the
91 level expected from the uniform distribution. }
92
93
94
95
96 }
97 \value{
98 \item{npl}{A per-chromosome list containing data frames describing
99 determined binding positions. Column description:
100 \item{x}{ position }
101 \item{y}{ score }
102 \item{evalue}{ E-value }
103 \item{fdr}{ FDR. For peaks higher than the maximum control peak,
104 the highest dataset FDR is reported }
105 \item{enr}{ lower bound of the fold-enrichment ratio confidence
106 interval. This is the estimate determined using scale of
107 1. Estimates corresponding to higher scales are returned in other enr columns
108 with scale appearing in the name.}
109 \item{enr.mle}{ enrichment ratio maximum likely estimate }
110 }
111 \item{thr}{ info on the chosen statistical threshold of the peak scores}
112 }
113
114 \examples{
115 # find binding positions using WTD method, 200bp half-window size,
116 control data, 1% FDR
117 bp <-
118 find.binding.positions(signal.data=chip.data,control.data=input.data,fdr=0.01,method=tag.wtd,whs=200);
119
120 # find binding positions using MTC method, using 5 tag randomizations,
121 # keeping pairs of tag positions together (shuffle.window=2)
122 bp <- find.binding.positions(signal.data=chip.data,control.data=input.data,fdr=0.01,method=tag.lwcc,whs=200,use.randomized.controls=T,n.randomizations=5,shuffle.window=2)
123
124 # print out the number of determined positions
125 print(paste("detected",sum(unlist(lapply(bp$npl,function(d) length(d$x)))),"peaks"));
126
127
128 }