Mercurial > repos > zzhou > spp_phantompeak
comparison spp/man/find.binding.positions.Rd @ 15:e689b83b0257 draft
Uploaded
author | zzhou |
---|---|
date | Tue, 27 Nov 2012 16:15:21 -0500 |
parents | ce08b0efa3fd |
children |
comparison
equal
deleted
inserted
replaced
14:918fecc1e7bb | 15:e689b83b0257 |
---|---|
1 \name{find.binding.positions} | |
2 \alias{find.binding.positions} | |
3 %- Also NEED an '\alias' for EACH other topic documented here. | |
4 \title{ Determine significant point protein binding positions (peaks) } | |
5 \description{ | |
6 Given the signal and optional control (input) data, determine location of the | |
7 statistically significant point binding positions. If the control data | |
8 is not provided, the statistical significance can be assessed based on | |
9 tag randomization. The method also provides options for masking | |
10 regions exhibiting strong signals within the control data. | |
11 } | |
12 \usage{ | |
13 find.binding.positions(signal.data, e.value = NULL, fdr = NULL, masked.data = NULL, control.data = NULL, min.dist = 200, window.size = 4e+07, cluster = NULL, debug = T, n.randomizations = 3, shuffle.window = 1, min.thr = 0, topN = NULL, tag.count.whs = 100, enrichment.z = 2, method = tag.wtd, tec.filter = T, tec.window.size = 10000, tec.masking.window.size=tec.window.size, tec.z = 5, tec.poisson.z=5,tec.poisson.ratio=5, n.control.samples = 1, enrichment.background.scales = c(1, 5, 10), background.density.scaling = F, use.randomized.controls = F, ...) | |
14 } | |
15 %- maybe also 'usage' for other objects documented here. | |
16 \arguments{ | |
17 ~~ tag data ~~ | |
18 \item{signal.data}{ signal tag vector list } | |
19 \item{control.data}{ optional control (input) tag vector list } | |
20 | |
21 ~~ position stringency criteria ~~ | |
22 \item{e.value}{ E-value defining the desired statistical significance | |
23 of binding positions. } | |
24 \item{fdr}{ FDR defining statistical significance of binding positions } | |
25 \item{topN}{ instead of determining statistical significance | |
26 thresholds, return the specified number of highest-scoring | |
27 positions} | |
28 | |
29 ~~ other params ~~ | |
30 \item{whs}{ window half-sized that should be used for binding | |
31 detection (e.g. determined from cross-correlation profiles)} | |
32 \item{masked.data}{ optional set of coordinates that should be masked | |
33 (e.g. known non-unique regions) } | |
34 \item{min.dist}{ minimal distance that must separate detected binding | |
35 positions. In case multiple binding positions are detected within | |
36 such distance, the position with the highest score is returned. } | |
37 \item{window.size}{ size of the window used to segment the chromosome | |
38 during calculations to reduce memory usage. } | |
39 \item{cluster}{ optional \code{snow} cluster to parallelize the | |
40 processing on } | |
41 \item{min.thr}{ minimal score requirement for a peak } | |
42 \item{background.density.scaling}{ If TRUE, regions of significant tag | |
43 enrichment will be masked out when calculating size ratio of the | |
44 signal to control datasets (to estimate ratio of the background tag | |
45 density). If FALSE, the dataset ratio will be equal to the ratio of | |
46 the number of tags in each dataset.} | |
47 | |
48 ~~ randomized controls ~~ | |
49 \item{n.randomizations}{ number of tag randomziations that should be | |
50 performed (when the control data is not provided) } | |
51 \item{use.randomized.controls}{ Use randomized tag control, even if | |
52 \code{control.data} is supplied. } | |
53 \item{shuffle.window}{ during tag randomizations, tags will be split | |
54 into groups of \code{shuffle.window} and will be maintained | |
55 together throughout the randomization. } | |
56 | |
57 ~~ fold-enrichment confidence intervals | |
58 \item{tag.count.whs}{ half-size of a window used to assess fold | |
59 enrichment of a binding position} | |
60 \item{enrichment.z}{ Z-score used to define the significance level of | |
61 the fold-enrichment confidence intervals } | |
62 \item{enrichment.background.scales}{ In estimating the peak | |
63 fold-enrichment confidence intervals, the background tag density is | |
64 estimated based on windows with half-sizes of | |
65 \code{2*tag.count.whs*enrichment.background.scales}. } | |
66 \item{method}{ either \code{tag.wtd} for WTD method, or | |
67 \code{tag.lwcc} for MTC method} | |
68 \item{mle.filter}{ If turned on, will exclude predicted positions | |
69 whose MLE enrichment ratio (for any of the background scales) is | |
70 below a specified min.mle.threshold } | |
71 \item{min.mle.threshold}{ MLE enrichment ratio threshold that each | |
72 predicted position must exceed if mle.filter is turned on. } | |
73 | |
74 ~~ masking regions of significant control enrichment ~~ | |
75 \item{tec.filter}{ Whether to mask out the regions exhibiting | |
76 significant enrichment in the control data in doing other | |
77 calculations. The regions are identified using Poisson statistics | |
78 within sliding windows, either relative to the scaled signal (tec.z), or | |
79 relative to randomly-distributed expectation (tec.poisson.z).} | |
80 \item{tec.window.size}{ size of the window used to determine | |
81 significantly enrichent control regions } | |
82 \item{tec.masking.window.size}{ size of the window used to mask | |
83 the area around significantly enrichent control regions } | |
84 \item{tec.z}{ Z-score defining statistical stringency by which a given | |
85 window is determined to be significantly higher in the input than in | |
86 the signal, and masked if that is the case.} | |
87 \item{tec.poisson.z}{ Z-score defining statistical stringency by which a given | |
88 window is determined to be significantly higher than the | |
89 tec.poisson.ratio above the expected uniform input background. } | |
90 \item{tec.poisson.ratio}{ Fold ratio by which input must exceed the | |
91 level expected from the uniform distribution. } | |
92 | |
93 | |
94 | |
95 | |
96 } | |
97 \value{ | |
98 \item{npl}{A per-chromosome list containing data frames describing | |
99 determined binding positions. Column description: | |
100 \item{x}{ position } | |
101 \item{y}{ score } | |
102 \item{evalue}{ E-value } | |
103 \item{fdr}{ FDR. For peaks higher than the maximum control peak, | |
104 the highest dataset FDR is reported } | |
105 \item{enr}{ lower bound of the fold-enrichment ratio confidence | |
106 interval. This is the estimate determined using scale of | |
107 1. Estimates corresponding to higher scales are returned in other enr columns | |
108 with scale appearing in the name.} | |
109 \item{enr.mle}{ enrichment ratio maximum likely estimate } | |
110 } | |
111 \item{thr}{ info on the chosen statistical threshold of the peak scores} | |
112 } | |
113 | |
114 \examples{ | |
115 # find binding positions using WTD method, 200bp half-window size, | |
116 control data, 1% FDR | |
117 bp <- | |
118 find.binding.positions(signal.data=chip.data,control.data=input.data,fdr=0.01,method=tag.wtd,whs=200); | |
119 | |
120 # find binding positions using MTC method, using 5 tag randomizations, | |
121 # keeping pairs of tag positions together (shuffle.window=2) | |
122 bp <- find.binding.positions(signal.data=chip.data,control.data=input.data,fdr=0.01,method=tag.lwcc,whs=200,use.randomized.controls=T,n.randomizations=5,shuffle.window=2) | |
123 | |
124 # print out the number of determined positions | |
125 print(paste("detected",sum(unlist(lapply(bp$npl,function(d) length(d$x)))),"peaks")); | |
126 | |
127 | |
128 } |