6
|
1 #include <vector>
|
|
2 #include <string.h>
|
|
3 #include <iostream>
|
|
4 #include <string>
|
|
5 #include <set>
|
|
6
|
|
7 extern "C" {
|
|
8 #include "R.h"
|
|
9 #include "Rmath.h"
|
|
10 #include "Rinternals.h"
|
|
11 #include "Rdefines.h"
|
|
12 }
|
|
13
|
|
14 using namespace std;
|
|
15 using namespace __gnu_cxx;
|
|
16
|
|
17 /**
|
|
18 * Calculate all local peaks
|
|
19 */
|
|
20
|
|
21 //#define DEBUG 1
|
|
22
|
|
23 extern "C" {
|
|
24 SEXP find_peaks(SEXP x_R,SEXP thr_R,SEXP max_span_R) {
|
|
25
|
|
26 #ifdef DEBUG
|
|
27 Rprintf("start\n");
|
|
28 #endif
|
|
29 double* x=REAL(x_R);
|
|
30 int nx=LENGTH(x_R);
|
|
31 int max_span=*INTEGER(max_span_R);
|
|
32 double thr=REAL(thr_R)[0];
|
|
33 #ifdef DEBUG
|
|
34 Rprintf("n=%d; thr=%f; max_span=%d\n",nx,thr,max_span);
|
|
35 #endif
|
|
36
|
|
37 vector<int> pos;
|
|
38
|
|
39 double pv=x[0];
|
|
40 double ppv=0; // previous peak value
|
|
41 int ppp=-max_span-1; // previous peak position
|
|
42
|
|
43 for(int i=1;i<(nx-1);i++) {
|
|
44 if(x[i]>pv && x[i]>=thr && x[i]>x[i+1]) {
|
|
45 if(max_span>2) {
|
|
46 //Rprintf("i=%d; ppp=%d\n",i,ppp);
|
|
47 if(i-ppp > max_span) {
|
|
48 if(ppp>=0) {
|
|
49 pos.push_back(ppp);
|
|
50 }
|
|
51 //Rprintf("recorded %d; now %d\n",ppp,i);
|
|
52 ppp=i; ppv=x[i];
|
|
53 } else {
|
|
54 if(x[i]>ppv) {
|
|
55 //Rprintf("reset from %d to %d\n",ppp,i);
|
|
56 ppp=i; ppv=x[i];
|
|
57 }
|
|
58 }
|
|
59 } else {
|
|
60 pos.push_back(i);
|
|
61 }
|
|
62 }
|
|
63 if(x[i]!=x[i+1]) { pv=x[i]; }
|
|
64 }
|
|
65
|
|
66 // add remaining peak
|
|
67 if(max_span>2 && ppp>=0) {
|
|
68 pos.push_back(ppp);
|
|
69 }
|
|
70
|
|
71 SEXP nv;
|
|
72 PROTECT(nv=allocVector(INTSXP,pos.size()));
|
|
73 int* i_nv=INTEGER(nv);
|
|
74 int i=0;
|
|
75 for(vector<int> ::const_iterator pi=pos.begin();pi!=pos.end();++pi) {
|
|
76 i_nv[i++]=1+(*pi);
|
|
77 }
|
|
78
|
|
79 UNPROTECT(1);
|
|
80 return(nv);
|
|
81 }
|
|
82
|
|
83
|
|
84
|
|
85
|
|
86 /************************************************************************/
|
|
87 // given a data vector d (positive values) and a set of signed center coordinates pos,
|
|
88 // returns coordinates of data points relative to the centers
|
|
89 // size is the size of the region around the centers
|
|
90 // return: vector of relative coordinates (x) and indecies of centers relative the coordinate
|
|
91 // was calculated (i).
|
|
92 SEXP get_relative_coordinates(SEXP d_R,
|
|
93 SEXP pos_R,
|
|
94 SEXP size_R)
|
|
95 {
|
|
96 int *d, *pos;
|
|
97 int npos,nd,size;
|
|
98
|
|
99 d = INTEGER(d_R); pos = INTEGER(pos_R);
|
|
100 npos=LENGTH(pos_R); nd=LENGTH(d_R);
|
|
101 size = INTEGER(size_R)[0];
|
|
102 #ifdef DEBUG
|
|
103 Rprintf("|d|=%d, |c|=%d, size=%d\n",nd,npos,size);
|
|
104 #endif
|
|
105
|
|
106 vector<int> x; vector<int> xi;
|
|
107 int k=0; // current pos index
|
|
108
|
|
109 for(int i=0;i<nd;i++) {
|
|
110 // increment k until pos[k]+size>=d[i]
|
|
111 while((abs(pos[k])+size) < d[i]) { k++; if(k==npos) { break; };
|
|
112 #ifdef DEBUG
|
|
113 Rprintf("advancing k to %d\n",k);
|
|
114 #endif
|
|
115 }
|
|
116 if(k==npos) { break; };
|
|
117 // increment i until d[i]>=pos[k]-size
|
|
118 while((abs(pos[k])-size) > d[i]) { i++; if(i==nd) { break; }
|
|
119 #ifdef DEBUG
|
|
120 Rprintf("advancing i to %d\n",i);
|
|
121 #endif
|
|
122 }
|
|
123 if(i==nd) { break; }
|
|
124
|
|
125
|
|
126 int l=k;
|
|
127 while((l<npos) && ((abs(pos[l])-size) <= d[i])) { l++;
|
|
128 #ifdef DEBUG
|
|
129 Rprintf("advancing l to %d\n",l);
|
|
130 #endif
|
|
131 }
|
|
132 for(int j=k;j<l;j++) {
|
|
133 int pd=d[i]-abs(pos[j]);
|
|
134 if(abs(pd)<=size) {
|
|
135 // record
|
|
136 if(pos[j]>0) {
|
|
137 x.push_back(pd);
|
|
138 } else {
|
|
139 x.push_back(-1*pd);
|
|
140 }
|
|
141 xi.push_back(j);
|
|
142 #ifdef DEBUG
|
|
143 Rprintf("recorded i=%d, j=%d\n",i,j);
|
|
144 #endif
|
|
145 } else {
|
|
146 break;
|
|
147 }
|
|
148 }
|
|
149 }
|
|
150
|
|
151 SEXP xv_R,xiv_R;
|
|
152 PROTECT(xv_R=allocVector(INTSXP,x.size()));
|
|
153 PROTECT(xiv_R=allocVector(INTSXP,x.size()));
|
|
154 int* xv=INTEGER(xv_R);
|
|
155 int* xiv=INTEGER(xiv_R);
|
|
156
|
|
157 int i=0;
|
|
158 for(vector<int> ::const_iterator pi=x.begin();pi!=x.end();++pi) {
|
|
159 xv[i++]=*pi;
|
|
160 }
|
|
161 i=0;
|
|
162 for(vector<int> ::const_iterator pi=xi.begin();pi!=xi.end();++pi) {
|
|
163 xiv[i++]=1+(*pi);
|
|
164 }
|
|
165
|
|
166 SEXP ans_R, names_R;
|
|
167 PROTECT(names_R = allocVector(STRSXP, 2));
|
|
168 SET_STRING_ELT(names_R, 0, mkChar("x"));
|
|
169 SET_STRING_ELT(names_R, 1, mkChar("i"));
|
|
170
|
|
171 PROTECT(ans_R = allocVector(VECSXP, 2));
|
|
172 SET_VECTOR_ELT(ans_R, 0, xv_R);
|
|
173 SET_VECTOR_ELT(ans_R, 1, xiv_R);
|
|
174 setAttrib(ans_R, R_NamesSymbol, names_R);
|
|
175
|
|
176 UNPROTECT(4);
|
|
177 return(ans_R);
|
|
178 }
|
|
179
|
|
180
|
|
181 // determines a set of points within a set of fragments
|
|
182 // note: all vectors sorted in ascending order
|
|
183 // note: all vectors are integers
|
|
184 // x_R - vector of point positions
|
|
185 // se_R - vector of start and end positions
|
|
186 // fi_R - vector of signed fragment indecies
|
|
187 // return_list_R - whether a list of fragments should be returned for each point
|
|
188 // return_unique_R - whether points in multiple fragments should be omitted
|
|
189 SEXP points_within(SEXP x_R,SEXP se_R,SEXP fi_R,SEXP return_list_R,SEXP return_unique_R,SEXP return_point_counts_R) {
|
|
190 #ifdef DEBUG
|
|
191 Rprintf("start\n");
|
|
192 #endif
|
|
193 int* x=INTEGER(x_R);
|
|
194 int nx=LENGTH(x_R);
|
|
195 int* se=INTEGER(se_R);
|
|
196 int* fi=INTEGER(fi_R);
|
|
197 int nf=LENGTH(se_R);
|
|
198
|
|
199 int return_list=*(INTEGER(return_list_R));
|
|
200 int return_unique=*(INTEGER(return_unique_R));
|
|
201 int return_point_counts=*(INTEGER(return_point_counts_R));
|
|
202
|
|
203 #ifdef DEBUG
|
|
204 Rprintf("nf=%d; nx=%d, return_list=%d, return_unique=%d, return_point_counts=%d\n",nf/2,nx,return_list,return_unique,return_point_counts);
|
|
205 #endif
|
|
206 set<int> fset;
|
|
207
|
|
208
|
|
209 SEXP nv; int *i_nv;
|
|
210 int np=0;
|
|
211 if(return_point_counts) {
|
|
212 PROTECT(nv = allocVector(INTSXP, nf/2)); np++;
|
|
213 i_nv=INTEGER(nv);
|
|
214 for(int i=0;i<nf/2;i++) { i_nv[i]=0; }
|
|
215 } else if(return_list) {
|
|
216 PROTECT(nv = allocVector(VECSXP, nx)); np++;
|
|
217 } else {
|
|
218 PROTECT(nv=allocVector(INTSXP,nx)); np++;
|
|
219 i_nv=INTEGER(nv);
|
|
220 }
|
|
221
|
|
222 int j=0;
|
|
223
|
|
224 for(int i=0;i<nx;i++) {
|
|
225 // advance j
|
|
226 while(j<nf && se[j]<x[i]) {
|
|
227 int frag=fi[j];
|
|
228 if(frag>0) { // insert
|
|
229 fset.insert(frag);
|
|
230 #ifdef DEBUG
|
|
231 Rprintf("inserted frag %d, size=%d\n",frag,fset.size());
|
|
232 #endif
|
|
233 } else { // remove
|
|
234 fset.erase(-frag);
|
|
235 #ifdef DEBUG
|
|
236 Rprintf("removed frag %d, size=%d\n",-frag,fset.size());
|
|
237 #endif
|
|
238 }
|
|
239 j++;
|
|
240 }
|
|
241 #ifdef DEBUG
|
|
242 Rprintf("i=%d j=%d\n",i,j);
|
|
243 #endif
|
|
244 if(return_list) {
|
|
245 if(fset.empty() || (return_unique && fset.size()>1)) {
|
|
246 // assign null list?
|
|
247 } else {
|
|
248 SEXP fil_R;
|
|
249 PROTECT(fil_R=allocVector(INTSXP,fset.size())); np++;
|
|
250 int* fil=INTEGER(fil_R);
|
|
251 int k=0;
|
|
252 for(set<int>::const_iterator ki=fset.begin();ki!=fset.end();++ki) {
|
|
253 fil[k]=*ki; k++;
|
|
254 }
|
|
255 SET_VECTOR_ELT(nv, i, fil_R);
|
|
256 UNPROTECT(1); np--;
|
|
257 }
|
|
258 } else {
|
|
259 if(return_point_counts) {
|
|
260 for(set<int>::const_iterator ki=fset.begin();ki!=fset.end();++ki) {
|
|
261 i_nv[*ki-1]++;
|
|
262 }
|
|
263 } else {
|
|
264 if(fset.empty() || (return_unique && fset.size()>1)) {
|
|
265 i_nv[i]=-1;
|
|
266 } else {
|
|
267 i_nv[i]=*fset.begin();
|
|
268 }
|
|
269 }
|
|
270 }
|
|
271 }
|
|
272
|
|
273 UNPROTECT(np);
|
|
274 return(nv);
|
|
275 }
|
|
276
|
|
277
|
|
278 SEXP expuni_lr(SEXP x_R, // positions and their number (assumed sorted in ascending order)
|
|
279 SEXP mdist_R, // max distance at which points should be considered
|
|
280 SEXP lambda_R, // lambda value
|
|
281 SEXP spos_R, // starting position
|
|
282 SEXP epos_R, // ending position
|
|
283 SEXP step_R, // step size
|
|
284 SEXP return_peaks_R, // whether peak positions should be returned, or entire score vector
|
|
285 SEXP min_peak_lr_R // min peak height (lr)
|
|
286 )
|
|
287 {
|
|
288
|
|
289 #ifdef DEBUG
|
|
290 Rprintf("start\n");
|
|
291 #endif
|
|
292 int* x=INTEGER(x_R);
|
|
293 int nx=LENGTH(x_R);
|
|
294 int mdist=INTEGER(mdist_R)[0];
|
|
295 double lambda=*(REAL(lambda_R));
|
|
296
|
|
297 int return_peaks=*(INTEGER(return_peaks_R));
|
|
298 double min_peak=*(REAL(min_peak_lr_R));
|
|
299
|
|
300 int spos=*(INTEGER(spos_R));
|
|
301 int epos=*(INTEGER(epos_R));
|
|
302 int step=*(INTEGER(step_R));
|
|
303
|
|
304 int nsteps=(int) (epos-spos)/step;
|
|
305
|
|
306
|
|
307 #ifdef DEBUG
|
|
308 Rprintf("n=%d; lambda=%f; mdist=%d; spos=%d; epos=%d; step=%d; nsteps=%d\n",nx,lambda,mdist,spos,epos,step,nsteps);
|
|
309 #endif
|
|
310
|
|
311
|
|
312 SEXP nv;
|
|
313 double *d_nv;
|
|
314 if(!return_peaks) {
|
|
315 PROTECT(nv=allocVector(REALSXP,nsteps+1));
|
|
316 d_nv=REAL(nv);
|
|
317 }
|
|
318
|
|
319
|
|
320 int i=0; // current index of the first point being used in the calculations
|
|
321 int j=0; // current index of the last point being used in the calculations
|
|
322 int sx=0; // current sum of all positions
|
|
323 int n=0;
|
|
324
|
|
325 for(int k=0; k<=nsteps; k++) {
|
|
326 int cpos=spos+k*step;
|
|
327 // increase i until x[i]>=cpos-mdist; remove x from sx; decrement n;
|
|
328 while(i<nx && x[i]<(cpos-mdist)) {
|
|
329 n--; sx-=x[i]; i++;
|
|
330 //Rprintf("incremented i: i=%d; n=%d; sx=%d; cpos-mdist=%d; x[i-1]=%d\n",i,n,sx,cpos-mdist,x[i-1]);
|
|
331 }
|
|
332 //Rprintf("stable i: i=%d; n=%d; sx=%d; cpos-mdist=%d; x[i-1]=%d\n",i,n,sx,cpos-mdist,x[i-1]);
|
|
333
|
|
334 //if(i>j) { j=i; }
|
|
335
|
|
336 // increase j until x[j]>cpos
|
|
337 while(j<nx && x[j]<=cpos) {
|
|
338 n++; sx+=x[j]; j++;
|
|
339 //Rprintf("incremented j: j=%d; n=%d; sx=%d; cpos=%d; x[j-1]=%d\n",j,n,sx,cpos,x[j-1]);
|
|
340 }
|
|
341 //Rprintf("stable j: j=%d; n=%d; sx=%d; cpos=%d; x[j-1]=%d\n",j,n,sx,cpos,x[j]);
|
|
342
|
|
343 // calculate lr
|
|
344 d_nv[k]=((double)(1-n))*log(lambda)-lambda*((double)(n*(cpos+1)-sx));
|
|
345 //Rprintf("recorded lr[%d]=%f\n",k-1,d_nv[k-1]);
|
|
346 }
|
|
347 UNPROTECT(1);
|
|
348 return(nv);
|
|
349 }
|
|
350
|
|
351
|
|
352 SEXP allpdist(SEXP x_R,SEXP max_dist_R) {
|
|
353
|
|
354 #ifdef DEBUG
|
|
355 Rprintf("start\n");
|
|
356 #endif
|
|
357 double* x=REAL(x_R);
|
|
358 int nx=LENGTH(x_R);
|
|
359 double max_dist=*REAL(max_dist_R);
|
|
360 #ifdef DEBUG
|
|
361 Rprintf("n=%d; max_dist=%d\n",nx,max_dist);
|
|
362 #endif
|
|
363
|
|
364 vector<double> dist;
|
|
365
|
|
366 for(int i=0;i<nx;i++) {
|
|
367 for(int j=i+1;j<nx;j++) {
|
|
368
|
|
369 double d=x[j]-x[i];
|
|
370 #ifdef DEBUG
|
|
371 Rprintf("i=%d; j=%d; d=%f\n",i,j,d);
|
|
372 #endif
|
|
373 if(d<=max_dist) {
|
|
374 dist.push_back(d);
|
|
375 } else {
|
|
376 break;
|
|
377 }
|
|
378 }
|
|
379 }
|
|
380
|
|
381 SEXP nv;
|
|
382 PROTECT(nv=allocVector(REALSXP,dist.size()));
|
|
383 double* i_nv=REAL(nv);
|
|
384 int i=0;
|
|
385 for(vector<double> ::const_iterator pi=dist.begin();pi!=dist.end();++pi) {
|
|
386 i_nv[i++]=*pi;
|
|
387 }
|
|
388
|
|
389 UNPROTECT(1);
|
|
390 return(nv);
|
|
391 }
|
|
392
|
|
393 // same as above, but for two different sets
|
|
394 SEXP allxpdist(SEXP x_R,SEXP y_R, SEXP max_dist_R) {
|
|
395
|
|
396 #ifdef DEBUG
|
|
397 Rprintf("start\n");
|
|
398 #endif
|
|
399 double* x=REAL(x_R);
|
|
400 double* y=REAL(y_R);
|
|
401 int nx=LENGTH(x_R);
|
|
402 int ny=LENGTH(y_R);
|
|
403 double max_dist=*REAL(max_dist_R);
|
|
404 #ifdef DEBUG
|
|
405 Rprintf("nx=%d; ny=%d; max_dist=%d\n",nx,ny,max_dist);
|
|
406 #endif
|
|
407
|
|
408 vector<double> dist;
|
|
409 int yi=0; // latest y start index
|
|
410
|
|
411 for(int i=0;i<nx;i++) {
|
|
412 // adjust yi so that yi>=x[i]-max_dist_R
|
|
413 while(y[yi]<(x[i]-max_dist) && yi<ny) { yi++; }
|
|
414 if(yi==ny) { break; }
|
|
415
|
|
416 for(int j=yi;j<ny;j++) {
|
|
417 double d=y[j]-x[i];
|
|
418 #ifdef DEBUG
|
|
419 Rprintf("i=%d; j=%d; d=%f\n",i,j,d);
|
|
420 #endif
|
|
421 if(d<=max_dist) {
|
|
422 dist.push_back(d);
|
|
423 } else {
|
|
424 break;
|
|
425 }
|
|
426 }
|
|
427 }
|
|
428
|
|
429 SEXP nv;
|
|
430 PROTECT(nv=allocVector(REALSXP,dist.size()));
|
|
431 double* i_nv=REAL(nv);
|
|
432 int i=0;
|
|
433 for(vector<double> ::const_iterator pi=dist.begin();pi!=dist.end();++pi) {
|
|
434 i_nv[i++]=*pi;
|
|
435 }
|
|
436
|
|
437 UNPROTECT(1);
|
|
438 return(nv);
|
|
439 }
|
|
440
|
|
441 // returns a vector giving for each point,
|
|
442 // number of points within a given max_dist
|
|
443 SEXP nwithindist(SEXP x_R,SEXP max_dist_R) {
|
|
444
|
|
445 #ifdef DEBUG
|
|
446 Rprintf("start\n");
|
|
447 #endif
|
|
448 double* x=REAL(x_R);
|
|
449 int nx=LENGTH(x_R);
|
|
450 double max_dist=*REAL(max_dist_R);
|
|
451
|
|
452 SEXP nv;
|
|
453 PROTECT(nv=allocVector(REALSXP,nx));
|
|
454 double* i_nv=REAL(nv);
|
|
455 for(int i=0;i<nx;i++) { i_nv[i]=0; }
|
|
456
|
|
457 #ifdef DEBUG
|
|
458 Rprintf("n=%d; max_dist=%d\n",nx,max_dist);
|
|
459 #endif
|
|
460
|
|
461 for(int i=0;i<nx;i++) {
|
|
462 for(int j=i+1;j<nx;j++) {
|
|
463
|
|
464 double d=x[j]-x[i];
|
|
465 #ifdef DEBUG
|
|
466 Rprintf("i=%d; j=%d; d=%f\n",i,j,d);
|
|
467 #endif
|
|
468 if(d<=max_dist) {
|
|
469 i_nv[i]++;
|
|
470 i_nv[j]++;
|
|
471 } else {
|
|
472 break;
|
|
473 }
|
|
474 }
|
|
475 }
|
|
476
|
|
477 UNPROTECT(1);
|
|
478 return(nv);
|
|
479 }
|
|
480
|
|
481
|
|
482
|
|
483
|
|
484 // given a list of sorted chromosome signal and background vectors (unscaled), determine
|
|
485 // cluster contigs exceeding thr poisson P value, based on a whs window size,
|
|
486 // and satisfying mcs cluster size
|
|
487 SEXP find_poisson_enrichment_clusters(SEXP pos_R,SEXP flag_R,SEXP wsize_R,SEXP thr_R,SEXP mcs_R,SEXP bgm_R,SEXP mintag_R,SEXP either_R) {
|
|
488
|
|
489 #ifdef DEBUG
|
|
490 Rprintf("start\n");
|
|
491 #endif
|
|
492 double* pos=REAL(pos_R);
|
|
493 int* flag=INTEGER(flag_R);
|
|
494 int nt=LENGTH(pos_R);
|
|
495
|
|
496 int mcs=*INTEGER(mcs_R);
|
|
497 int wsize=*INTEGER(wsize_R);
|
|
498 int either=*INTEGER(either_R);
|
|
499 double thr=REAL(thr_R)[0];
|
|
500 double bgm=REAL(bgm_R)[0];
|
|
501 double mintag=REAL(mintag_R)[0];
|
|
502
|
|
503 #ifdef DEBUG
|
|
504 Rprintf("nt=%d; wsize=%d; thr=%f; mcs=%d; min.tag=%f; bgm=%f\n",nt,wsize,thr,mcs,mintag,bgm);
|
|
505 #endif
|
|
506
|
|
507
|
|
508 vector< pair<double,double> > contigs;
|
|
509
|
|
510 // running indecies (start and end)
|
|
511 int si=0;
|
|
512 int ei=0;
|
|
513
|
|
514 // current window coordinate
|
|
515 double ws=pos[0];
|
|
516
|
|
517 // current window tag counts
|
|
518 int cc[2]={0,0};
|
|
519
|
|
520
|
|
521 if(nt>0) {
|
|
522 cc[flag[si]]++;
|
|
523 // increment window end
|
|
524 while(ei<(nt-1) && (pos[ei+1]-ws) <= wsize) {
|
|
525 ei++;
|
|
526 cc[flag[ei]]++;
|
|
527 }
|
|
528
|
|
529
|
|
530 // cluster start,end positions
|
|
531 double cs,ce;
|
|
532 int inclust=0;
|
|
533
|
|
534 while(si<nt-1) {
|
|
535
|
|
536 if((pos[si+1]-ws) > (pos[ei+1] - ws - wsize) && ei!=(nt-1)) {
|
|
537 // move end boudnary
|
|
538 ei++;
|
|
539 ws=pos[ei]-wsize;
|
|
540 cc[flag[ei]]++;
|
|
541 while(ei<(nt-1) && pos[ei+1]==ws+wsize) {
|
|
542 ei++;
|
|
543 cc[flag[ei]]++;
|
|
544 }
|
|
545
|
|
546 // increment window start
|
|
547 while(si<(nt-1) && pos[si] < ws) {
|
|
548 cc[flag[si]]--;
|
|
549 si++;
|
|
550 }
|
|
551
|
|
552 } else {
|
|
553 // move up start boundary
|
|
554 ws=pos[si+1];
|
|
555 cc[flag[si]]--;
|
|
556 si++;
|
|
557 while(si<(nt-1) && pos[si+1]==ws) {
|
|
558 cc[flag[si]]--;
|
|
559 si++;
|
|
560 }
|
|
561
|
|
562 // increment window end
|
|
563 while(ei<(nt-1) && (pos[ei+1] - ws) <= wsize) {
|
|
564 ei++;
|
|
565 cc[flag[ei]]++;
|
|
566 }
|
|
567
|
|
568 }
|
|
569
|
|
570 // calculate z score
|
|
571 double dc0=((double)cc[0])+0.5;
|
|
572 double dc1=((double)cc[1])+0.5;
|
|
573 double rte=dc0+dc1-0.25*thr*thr;
|
|
574 double lb;
|
|
575 if(rte<=0) {
|
|
576 lb=0;
|
|
577 } else {
|
|
578 lb=(sqrt(dc1*dc0) - 0.5*thr*sqrt(rte))/(dc0 - 0.25*thr*thr);
|
|
579 if(lb<0) { lb=0; }
|
|
580 lb*=lb;
|
|
581 }
|
|
582
|
|
583 //Rprintf("%f=f(%f,%f,%f); %f=f(%f,%f,%f)\n",lb,1.0-thr,2.0*dc1,2.0*dc0,ub,thr,2.0*dc1,2.0*dc0);
|
|
584
|
|
585 #ifdef DEBUG
|
|
586 //double ub=gsl_cdf_fdist_Qinv(thr,2.0*dc1,2.0*dc0)*dc1/dc0;
|
|
587 double ub=(sqrt(dc1*dc0) + 0.5*thr*sqrt(rte))/(dc0 - 0.25*thr*thr);
|
|
588 ub*=ub;
|
|
589 Rprintf("s=%d (%f); e=%d (%f); window: %f-%f; cc=[%d,%d]; lb=%f; ub=%f\n",si,pos[si],ei,pos[ei],ws,ws+wsize,cc[0],cc[1],lb,ub);
|
|
590 #endif
|
|
591
|
|
592 int bc=lb>=bgm && cc[1]>=mintag;
|
|
593 if(either) {
|
|
594 bc=lb>=bgm || cc[1]>=mintag;
|
|
595 }
|
|
596 if(bc) {
|
|
597 if(inclust) {
|
|
598 double nce=ws+wsize/2.0;
|
|
599 if(nce-ce > wsize/2.0) {
|
|
600 // next point is too far removed, end cluster
|
|
601 if(ce-cs >= mcs) {
|
|
602 contigs.push_back(pair<double,double>(cs,ce));
|
|
603 #ifdef DEBUG
|
|
604 Rprintf("recorded cluster %f-%f\n",cs,ce);
|
|
605 #endif
|
|
606 }
|
|
607 inclust=0;
|
|
608 } else {
|
|
609 ce=nce;
|
|
610 }
|
|
611 } else {
|
|
612 inclust=1;
|
|
613 cs=ws+wsize/2.0;
|
|
614 ce=cs;
|
|
615 }
|
|
616 } else {
|
|
617 if(inclust) {
|
|
618 if(ce-cs >= mcs) {
|
|
619 contigs.push_back(pair<double,double>(cs,ce));
|
|
620 #ifdef DEBUG
|
|
621 Rprintf("recorded cluster %f-%f\n",cs,ce);
|
|
622 #endif
|
|
623 }
|
|
624 inclust=0;
|
|
625 }
|
|
626 }
|
|
627
|
|
628 }
|
|
629
|
|
630 if(inclust) {
|
|
631 if(ce-cs >= mcs) {
|
|
632 contigs.push_back(pair<double,double>(cs,ce));
|
|
633 #ifdef DEBUG
|
|
634 Rprintf("recorded cluster %f-%f\n",cs,ce);
|
|
635 #endif
|
|
636 }
|
|
637 inclust=0;
|
|
638 }
|
|
639 }
|
|
640
|
|
641 SEXP cs_R,ce_R;
|
|
642 PROTECT(cs_R=allocVector(REALSXP,contigs.size()));
|
|
643 PROTECT(ce_R=allocVector(REALSXP,contigs.size()));
|
|
644 double* csa=REAL(cs_R);
|
|
645 double* cea=REAL(ce_R);
|
|
646
|
|
647 int i=0;
|
|
648 for(vector< pair<double,double> >::const_iterator ci=contigs.begin(); ci!=contigs.end();++ci) {
|
|
649 csa[i]=ci->first;
|
|
650 cea[i]=ci->second;
|
|
651 i++;
|
|
652 }
|
|
653
|
|
654 SEXP ans_R, names_R;
|
|
655 PROTECT(names_R = allocVector(STRSXP, 2));
|
|
656 SET_STRING_ELT(names_R, 0, mkChar("s"));
|
|
657 SET_STRING_ELT(names_R, 1, mkChar("e"));
|
|
658
|
|
659 PROTECT(ans_R = allocVector(VECSXP, 2));
|
|
660 SET_VECTOR_ELT(ans_R, 0, cs_R);
|
|
661 SET_VECTOR_ELT(ans_R, 1, ce_R);
|
|
662 setAttrib(ans_R, R_NamesSymbol, names_R);
|
|
663
|
|
664 UNPROTECT(4);
|
|
665 return(ans_R);
|
|
666
|
|
667 }
|
|
668
|
|
669
|
|
670 // finds intersection between a list of regions
|
|
671 // the flag has +n/-n value, corresponding to the start/end of a segment in n-th regionset
|
|
672 // max_val: 1 - report max overlapping value, -1: report min, 0 - don't look at values
|
|
673 // returns: $s, $e, ($v) lists
|
|
674 SEXP region_intersection(SEXP n_R,SEXP pos_R,SEXP flags_R,SEXP vals_R,SEXP max_val_R,SEXP union_R) {
|
|
675 const int max_val=*INTEGER(max_val_R);
|
|
676 const int unionr=*INTEGER(union_R);
|
|
677 const int n=*INTEGER(n_R);
|
|
678 double* pos=REAL(pos_R);
|
|
679 int* flags=INTEGER(flags_R);
|
|
680 double* val=REAL(vals_R);
|
|
681
|
|
682 #ifdef DEBUG
|
|
683 Rprintf("n=%d; npos=%d; max_val=%d\n",n,LENGTH(pos_R),max_val);
|
|
684 #endif
|
|
685
|
|
686 int s[n]; // flag status for each set
|
|
687 double mv[n]; // max/min value of current clusters
|
|
688
|
|
689 for(int i=0;i<n;i++) { s[i]=0; }
|
|
690
|
|
691 vector<double> starts;
|
|
692 vector<double> ends;
|
|
693 vector<double> values;
|
|
694
|
|
695 int start=-1;
|
|
696 double mval=0;
|
|
697 for(int i=0;i<LENGTH(pos_R);i++) {
|
|
698 // update flags
|
|
699 int f=flags[i];
|
|
700 if(f>0) {
|
|
701 s[abs(f)-1]++;
|
|
702 } else {
|
|
703 s[abs(f)-1]--;
|
|
704 }
|
|
705
|
|
706 if(max_val!=0 && val[i]*max_val > mval*max_val) { mval=val[i]; }
|
|
707
|
|
708 // joined status
|
|
709 int all;
|
|
710 if(unionr) {
|
|
711 all=0;
|
|
712 for(int j=0;j<n;j++) { if(s[j]>0) { all=1; break;} }
|
|
713 } else {
|
|
714 all=1;
|
|
715 for(int j=0;j<n;j++) { all=all & (s[j]>0); }
|
|
716 }
|
|
717
|
|
718
|
|
719 //Rprintf("i=%d; s=[",i);
|
|
720 //for(int j=0;j<n;j++) { Rprintf("%d",s[j]); }
|
|
721 //Rprintf("]; all=%d; start=%d\n",all,start);
|
|
722
|
|
723 if(start>=0) {
|
|
724 // in fragment
|
|
725 if(!all) {
|
|
726 // end fragment
|
|
727 starts.push_back(pos[start]);
|
|
728 ends.push_back(pos[i]);
|
|
729 start=-1;
|
|
730 if(max_val!=0) { values.push_back(mval); }
|
|
731
|
|
732 #ifdef DEBUG
|
|
733 Rprintf("recorded new fragment (s=%f,e=%f,v=%f);\n",pos[start],pos[i],mval);
|
|
734 #endif
|
|
735 }
|
|
736 } else {
|
|
737 // should a fragment be started?
|
|
738 if(all) {
|
|
739 start=i;
|
|
740 if(max_val!=0) { mval=val[i]; }
|
|
741 #ifdef DEBUG
|
|
742 Rprintf("starting new fragment (s=%f,i=%d);\n",pos[start],i);
|
|
743 #endif
|
|
744 }
|
|
745 }
|
|
746 }
|
|
747 SEXP cs_R,ce_R,cv_R;
|
|
748 PROTECT(cs_R=allocVector(REALSXP,starts.size()));
|
|
749 PROTECT(ce_R=allocVector(REALSXP,ends.size()));
|
|
750
|
|
751 double* csa=REAL(cs_R);
|
|
752 int i=0;
|
|
753 for(vector<double>::const_iterator ci=starts.begin(); ci!=starts.end(); ++ci) {
|
|
754 csa[i]=*ci; i++;
|
|
755 }
|
|
756
|
|
757 csa=REAL(ce_R);
|
|
758 i=0;
|
|
759 for(vector<double>::const_iterator ci=ends.begin(); ci!=ends.end(); ++ci) {
|
|
760 csa[i]=*ci; i++;
|
|
761 }
|
|
762
|
|
763 if(max_val!=0) {
|
|
764 PROTECT(cv_R=allocVector(REALSXP,values.size()));
|
|
765 csa=REAL(cv_R);
|
|
766 i=0;
|
|
767 for(vector<double>::const_iterator ci=values.begin(); ci!=values.end(); ++ci) {
|
|
768 csa[i]=*ci; i++;
|
|
769 }
|
|
770 }
|
|
771
|
|
772 SEXP ans_R, names_R;
|
|
773 if(max_val!=0) {
|
|
774 PROTECT(names_R = allocVector(STRSXP, 3));
|
|
775 SET_STRING_ELT(names_R, 0, mkChar("s"));
|
|
776 SET_STRING_ELT(names_R, 1, mkChar("e"));
|
|
777 SET_STRING_ELT(names_R, 2, mkChar("v"));
|
|
778
|
|
779 PROTECT(ans_R = allocVector(VECSXP, 3));
|
|
780 SET_VECTOR_ELT(ans_R, 0, cs_R);
|
|
781 SET_VECTOR_ELT(ans_R, 1, ce_R);
|
|
782 SET_VECTOR_ELT(ans_R, 2, cv_R);
|
|
783 } else {
|
|
784 PROTECT(names_R = allocVector(STRSXP, 2));
|
|
785 SET_STRING_ELT(names_R, 0, mkChar("s"));
|
|
786 SET_STRING_ELT(names_R, 1, mkChar("e"));
|
|
787
|
|
788 PROTECT(ans_R = allocVector(VECSXP, 2));
|
|
789 SET_VECTOR_ELT(ans_R, 0, cs_R);
|
|
790 SET_VECTOR_ELT(ans_R, 1, ce_R);
|
|
791 }
|
|
792
|
|
793 setAttrib(ans_R, R_NamesSymbol, names_R);
|
|
794
|
|
795 if(max_val!=0) {
|
|
796 UNPROTECT(5);
|
|
797 } else {
|
|
798 UNPROTECT(4);
|
|
799 }
|
|
800 return(ans_R);
|
|
801 }
|
|
802
|
|
803 }
|
|
804
|