Mercurial > repos > zzhou > spp_phantompeak
comparison spp/src/peaks.cpp @ 6:ce08b0efa3fd draft
Uploaded
author | zzhou |
---|---|
date | Tue, 27 Nov 2012 16:11:40 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:608a8e0eac56 | 6:ce08b0efa3fd |
---|---|
1 #include <vector> | |
2 #include <string.h> | |
3 #include <iostream> | |
4 #include <string> | |
5 #include <set> | |
6 | |
7 extern "C" { | |
8 #include "R.h" | |
9 #include "Rmath.h" | |
10 #include "Rinternals.h" | |
11 #include "Rdefines.h" | |
12 } | |
13 | |
14 using namespace std; | |
15 using namespace __gnu_cxx; | |
16 | |
17 /** | |
18 * Calculate all local peaks | |
19 */ | |
20 | |
21 //#define DEBUG 1 | |
22 | |
23 extern "C" { | |
24 SEXP find_peaks(SEXP x_R,SEXP thr_R,SEXP max_span_R) { | |
25 | |
26 #ifdef DEBUG | |
27 Rprintf("start\n"); | |
28 #endif | |
29 double* x=REAL(x_R); | |
30 int nx=LENGTH(x_R); | |
31 int max_span=*INTEGER(max_span_R); | |
32 double thr=REAL(thr_R)[0]; | |
33 #ifdef DEBUG | |
34 Rprintf("n=%d; thr=%f; max_span=%d\n",nx,thr,max_span); | |
35 #endif | |
36 | |
37 vector<int> pos; | |
38 | |
39 double pv=x[0]; | |
40 double ppv=0; // previous peak value | |
41 int ppp=-max_span-1; // previous peak position | |
42 | |
43 for(int i=1;i<(nx-1);i++) { | |
44 if(x[i]>pv && x[i]>=thr && x[i]>x[i+1]) { | |
45 if(max_span>2) { | |
46 //Rprintf("i=%d; ppp=%d\n",i,ppp); | |
47 if(i-ppp > max_span) { | |
48 if(ppp>=0) { | |
49 pos.push_back(ppp); | |
50 } | |
51 //Rprintf("recorded %d; now %d\n",ppp,i); | |
52 ppp=i; ppv=x[i]; | |
53 } else { | |
54 if(x[i]>ppv) { | |
55 //Rprintf("reset from %d to %d\n",ppp,i); | |
56 ppp=i; ppv=x[i]; | |
57 } | |
58 } | |
59 } else { | |
60 pos.push_back(i); | |
61 } | |
62 } | |
63 if(x[i]!=x[i+1]) { pv=x[i]; } | |
64 } | |
65 | |
66 // add remaining peak | |
67 if(max_span>2 && ppp>=0) { | |
68 pos.push_back(ppp); | |
69 } | |
70 | |
71 SEXP nv; | |
72 PROTECT(nv=allocVector(INTSXP,pos.size())); | |
73 int* i_nv=INTEGER(nv); | |
74 int i=0; | |
75 for(vector<int> ::const_iterator pi=pos.begin();pi!=pos.end();++pi) { | |
76 i_nv[i++]=1+(*pi); | |
77 } | |
78 | |
79 UNPROTECT(1); | |
80 return(nv); | |
81 } | |
82 | |
83 | |
84 | |
85 | |
86 /************************************************************************/ | |
87 // given a data vector d (positive values) and a set of signed center coordinates pos, | |
88 // returns coordinates of data points relative to the centers | |
89 // size is the size of the region around the centers | |
90 // return: vector of relative coordinates (x) and indecies of centers relative the coordinate | |
91 // was calculated (i). | |
92 SEXP get_relative_coordinates(SEXP d_R, | |
93 SEXP pos_R, | |
94 SEXP size_R) | |
95 { | |
96 int *d, *pos; | |
97 int npos,nd,size; | |
98 | |
99 d = INTEGER(d_R); pos = INTEGER(pos_R); | |
100 npos=LENGTH(pos_R); nd=LENGTH(d_R); | |
101 size = INTEGER(size_R)[0]; | |
102 #ifdef DEBUG | |
103 Rprintf("|d|=%d, |c|=%d, size=%d\n",nd,npos,size); | |
104 #endif | |
105 | |
106 vector<int> x; vector<int> xi; | |
107 int k=0; // current pos index | |
108 | |
109 for(int i=0;i<nd;i++) { | |
110 // increment k until pos[k]+size>=d[i] | |
111 while((abs(pos[k])+size) < d[i]) { k++; if(k==npos) { break; }; | |
112 #ifdef DEBUG | |
113 Rprintf("advancing k to %d\n",k); | |
114 #endif | |
115 } | |
116 if(k==npos) { break; }; | |
117 // increment i until d[i]>=pos[k]-size | |
118 while((abs(pos[k])-size) > d[i]) { i++; if(i==nd) { break; } | |
119 #ifdef DEBUG | |
120 Rprintf("advancing i to %d\n",i); | |
121 #endif | |
122 } | |
123 if(i==nd) { break; } | |
124 | |
125 | |
126 int l=k; | |
127 while((l<npos) && ((abs(pos[l])-size) <= d[i])) { l++; | |
128 #ifdef DEBUG | |
129 Rprintf("advancing l to %d\n",l); | |
130 #endif | |
131 } | |
132 for(int j=k;j<l;j++) { | |
133 int pd=d[i]-abs(pos[j]); | |
134 if(abs(pd)<=size) { | |
135 // record | |
136 if(pos[j]>0) { | |
137 x.push_back(pd); | |
138 } else { | |
139 x.push_back(-1*pd); | |
140 } | |
141 xi.push_back(j); | |
142 #ifdef DEBUG | |
143 Rprintf("recorded i=%d, j=%d\n",i,j); | |
144 #endif | |
145 } else { | |
146 break; | |
147 } | |
148 } | |
149 } | |
150 | |
151 SEXP xv_R,xiv_R; | |
152 PROTECT(xv_R=allocVector(INTSXP,x.size())); | |
153 PROTECT(xiv_R=allocVector(INTSXP,x.size())); | |
154 int* xv=INTEGER(xv_R); | |
155 int* xiv=INTEGER(xiv_R); | |
156 | |
157 int i=0; | |
158 for(vector<int> ::const_iterator pi=x.begin();pi!=x.end();++pi) { | |
159 xv[i++]=*pi; | |
160 } | |
161 i=0; | |
162 for(vector<int> ::const_iterator pi=xi.begin();pi!=xi.end();++pi) { | |
163 xiv[i++]=1+(*pi); | |
164 } | |
165 | |
166 SEXP ans_R, names_R; | |
167 PROTECT(names_R = allocVector(STRSXP, 2)); | |
168 SET_STRING_ELT(names_R, 0, mkChar("x")); | |
169 SET_STRING_ELT(names_R, 1, mkChar("i")); | |
170 | |
171 PROTECT(ans_R = allocVector(VECSXP, 2)); | |
172 SET_VECTOR_ELT(ans_R, 0, xv_R); | |
173 SET_VECTOR_ELT(ans_R, 1, xiv_R); | |
174 setAttrib(ans_R, R_NamesSymbol, names_R); | |
175 | |
176 UNPROTECT(4); | |
177 return(ans_R); | |
178 } | |
179 | |
180 | |
181 // determines a set of points within a set of fragments | |
182 // note: all vectors sorted in ascending order | |
183 // note: all vectors are integers | |
184 // x_R - vector of point positions | |
185 // se_R - vector of start and end positions | |
186 // fi_R - vector of signed fragment indecies | |
187 // return_list_R - whether a list of fragments should be returned for each point | |
188 // return_unique_R - whether points in multiple fragments should be omitted | |
189 SEXP points_within(SEXP x_R,SEXP se_R,SEXP fi_R,SEXP return_list_R,SEXP return_unique_R,SEXP return_point_counts_R) { | |
190 #ifdef DEBUG | |
191 Rprintf("start\n"); | |
192 #endif | |
193 int* x=INTEGER(x_R); | |
194 int nx=LENGTH(x_R); | |
195 int* se=INTEGER(se_R); | |
196 int* fi=INTEGER(fi_R); | |
197 int nf=LENGTH(se_R); | |
198 | |
199 int return_list=*(INTEGER(return_list_R)); | |
200 int return_unique=*(INTEGER(return_unique_R)); | |
201 int return_point_counts=*(INTEGER(return_point_counts_R)); | |
202 | |
203 #ifdef DEBUG | |
204 Rprintf("nf=%d; nx=%d, return_list=%d, return_unique=%d, return_point_counts=%d\n",nf/2,nx,return_list,return_unique,return_point_counts); | |
205 #endif | |
206 set<int> fset; | |
207 | |
208 | |
209 SEXP nv; int *i_nv; | |
210 int np=0; | |
211 if(return_point_counts) { | |
212 PROTECT(nv = allocVector(INTSXP, nf/2)); np++; | |
213 i_nv=INTEGER(nv); | |
214 for(int i=0;i<nf/2;i++) { i_nv[i]=0; } | |
215 } else if(return_list) { | |
216 PROTECT(nv = allocVector(VECSXP, nx)); np++; | |
217 } else { | |
218 PROTECT(nv=allocVector(INTSXP,nx)); np++; | |
219 i_nv=INTEGER(nv); | |
220 } | |
221 | |
222 int j=0; | |
223 | |
224 for(int i=0;i<nx;i++) { | |
225 // advance j | |
226 while(j<nf && se[j]<x[i]) { | |
227 int frag=fi[j]; | |
228 if(frag>0) { // insert | |
229 fset.insert(frag); | |
230 #ifdef DEBUG | |
231 Rprintf("inserted frag %d, size=%d\n",frag,fset.size()); | |
232 #endif | |
233 } else { // remove | |
234 fset.erase(-frag); | |
235 #ifdef DEBUG | |
236 Rprintf("removed frag %d, size=%d\n",-frag,fset.size()); | |
237 #endif | |
238 } | |
239 j++; | |
240 } | |
241 #ifdef DEBUG | |
242 Rprintf("i=%d j=%d\n",i,j); | |
243 #endif | |
244 if(return_list) { | |
245 if(fset.empty() || (return_unique && fset.size()>1)) { | |
246 // assign null list? | |
247 } else { | |
248 SEXP fil_R; | |
249 PROTECT(fil_R=allocVector(INTSXP,fset.size())); np++; | |
250 int* fil=INTEGER(fil_R); | |
251 int k=0; | |
252 for(set<int>::const_iterator ki=fset.begin();ki!=fset.end();++ki) { | |
253 fil[k]=*ki; k++; | |
254 } | |
255 SET_VECTOR_ELT(nv, i, fil_R); | |
256 UNPROTECT(1); np--; | |
257 } | |
258 } else { | |
259 if(return_point_counts) { | |
260 for(set<int>::const_iterator ki=fset.begin();ki!=fset.end();++ki) { | |
261 i_nv[*ki-1]++; | |
262 } | |
263 } else { | |
264 if(fset.empty() || (return_unique && fset.size()>1)) { | |
265 i_nv[i]=-1; | |
266 } else { | |
267 i_nv[i]=*fset.begin(); | |
268 } | |
269 } | |
270 } | |
271 } | |
272 | |
273 UNPROTECT(np); | |
274 return(nv); | |
275 } | |
276 | |
277 | |
278 SEXP expuni_lr(SEXP x_R, // positions and their number (assumed sorted in ascending order) | |
279 SEXP mdist_R, // max distance at which points should be considered | |
280 SEXP lambda_R, // lambda value | |
281 SEXP spos_R, // starting position | |
282 SEXP epos_R, // ending position | |
283 SEXP step_R, // step size | |
284 SEXP return_peaks_R, // whether peak positions should be returned, or entire score vector | |
285 SEXP min_peak_lr_R // min peak height (lr) | |
286 ) | |
287 { | |
288 | |
289 #ifdef DEBUG | |
290 Rprintf("start\n"); | |
291 #endif | |
292 int* x=INTEGER(x_R); | |
293 int nx=LENGTH(x_R); | |
294 int mdist=INTEGER(mdist_R)[0]; | |
295 double lambda=*(REAL(lambda_R)); | |
296 | |
297 int return_peaks=*(INTEGER(return_peaks_R)); | |
298 double min_peak=*(REAL(min_peak_lr_R)); | |
299 | |
300 int spos=*(INTEGER(spos_R)); | |
301 int epos=*(INTEGER(epos_R)); | |
302 int step=*(INTEGER(step_R)); | |
303 | |
304 int nsteps=(int) (epos-spos)/step; | |
305 | |
306 | |
307 #ifdef DEBUG | |
308 Rprintf("n=%d; lambda=%f; mdist=%d; spos=%d; epos=%d; step=%d; nsteps=%d\n",nx,lambda,mdist,spos,epos,step,nsteps); | |
309 #endif | |
310 | |
311 | |
312 SEXP nv; | |
313 double *d_nv; | |
314 if(!return_peaks) { | |
315 PROTECT(nv=allocVector(REALSXP,nsteps+1)); | |
316 d_nv=REAL(nv); | |
317 } | |
318 | |
319 | |
320 int i=0; // current index of the first point being used in the calculations | |
321 int j=0; // current index of the last point being used in the calculations | |
322 int sx=0; // current sum of all positions | |
323 int n=0; | |
324 | |
325 for(int k=0; k<=nsteps; k++) { | |
326 int cpos=spos+k*step; | |
327 // increase i until x[i]>=cpos-mdist; remove x from sx; decrement n; | |
328 while(i<nx && x[i]<(cpos-mdist)) { | |
329 n--; sx-=x[i]; i++; | |
330 //Rprintf("incremented i: i=%d; n=%d; sx=%d; cpos-mdist=%d; x[i-1]=%d\n",i,n,sx,cpos-mdist,x[i-1]); | |
331 } | |
332 //Rprintf("stable i: i=%d; n=%d; sx=%d; cpos-mdist=%d; x[i-1]=%d\n",i,n,sx,cpos-mdist,x[i-1]); | |
333 | |
334 //if(i>j) { j=i; } | |
335 | |
336 // increase j until x[j]>cpos | |
337 while(j<nx && x[j]<=cpos) { | |
338 n++; sx+=x[j]; j++; | |
339 //Rprintf("incremented j: j=%d; n=%d; sx=%d; cpos=%d; x[j-1]=%d\n",j,n,sx,cpos,x[j-1]); | |
340 } | |
341 //Rprintf("stable j: j=%d; n=%d; sx=%d; cpos=%d; x[j-1]=%d\n",j,n,sx,cpos,x[j]); | |
342 | |
343 // calculate lr | |
344 d_nv[k]=((double)(1-n))*log(lambda)-lambda*((double)(n*(cpos+1)-sx)); | |
345 //Rprintf("recorded lr[%d]=%f\n",k-1,d_nv[k-1]); | |
346 } | |
347 UNPROTECT(1); | |
348 return(nv); | |
349 } | |
350 | |
351 | |
352 SEXP allpdist(SEXP x_R,SEXP max_dist_R) { | |
353 | |
354 #ifdef DEBUG | |
355 Rprintf("start\n"); | |
356 #endif | |
357 double* x=REAL(x_R); | |
358 int nx=LENGTH(x_R); | |
359 double max_dist=*REAL(max_dist_R); | |
360 #ifdef DEBUG | |
361 Rprintf("n=%d; max_dist=%d\n",nx,max_dist); | |
362 #endif | |
363 | |
364 vector<double> dist; | |
365 | |
366 for(int i=0;i<nx;i++) { | |
367 for(int j=i+1;j<nx;j++) { | |
368 | |
369 double d=x[j]-x[i]; | |
370 #ifdef DEBUG | |
371 Rprintf("i=%d; j=%d; d=%f\n",i,j,d); | |
372 #endif | |
373 if(d<=max_dist) { | |
374 dist.push_back(d); | |
375 } else { | |
376 break; | |
377 } | |
378 } | |
379 } | |
380 | |
381 SEXP nv; | |
382 PROTECT(nv=allocVector(REALSXP,dist.size())); | |
383 double* i_nv=REAL(nv); | |
384 int i=0; | |
385 for(vector<double> ::const_iterator pi=dist.begin();pi!=dist.end();++pi) { | |
386 i_nv[i++]=*pi; | |
387 } | |
388 | |
389 UNPROTECT(1); | |
390 return(nv); | |
391 } | |
392 | |
393 // same as above, but for two different sets | |
394 SEXP allxpdist(SEXP x_R,SEXP y_R, SEXP max_dist_R) { | |
395 | |
396 #ifdef DEBUG | |
397 Rprintf("start\n"); | |
398 #endif | |
399 double* x=REAL(x_R); | |
400 double* y=REAL(y_R); | |
401 int nx=LENGTH(x_R); | |
402 int ny=LENGTH(y_R); | |
403 double max_dist=*REAL(max_dist_R); | |
404 #ifdef DEBUG | |
405 Rprintf("nx=%d; ny=%d; max_dist=%d\n",nx,ny,max_dist); | |
406 #endif | |
407 | |
408 vector<double> dist; | |
409 int yi=0; // latest y start index | |
410 | |
411 for(int i=0;i<nx;i++) { | |
412 // adjust yi so that yi>=x[i]-max_dist_R | |
413 while(y[yi]<(x[i]-max_dist) && yi<ny) { yi++; } | |
414 if(yi==ny) { break; } | |
415 | |
416 for(int j=yi;j<ny;j++) { | |
417 double d=y[j]-x[i]; | |
418 #ifdef DEBUG | |
419 Rprintf("i=%d; j=%d; d=%f\n",i,j,d); | |
420 #endif | |
421 if(d<=max_dist) { | |
422 dist.push_back(d); | |
423 } else { | |
424 break; | |
425 } | |
426 } | |
427 } | |
428 | |
429 SEXP nv; | |
430 PROTECT(nv=allocVector(REALSXP,dist.size())); | |
431 double* i_nv=REAL(nv); | |
432 int i=0; | |
433 for(vector<double> ::const_iterator pi=dist.begin();pi!=dist.end();++pi) { | |
434 i_nv[i++]=*pi; | |
435 } | |
436 | |
437 UNPROTECT(1); | |
438 return(nv); | |
439 } | |
440 | |
441 // returns a vector giving for each point, | |
442 // number of points within a given max_dist | |
443 SEXP nwithindist(SEXP x_R,SEXP max_dist_R) { | |
444 | |
445 #ifdef DEBUG | |
446 Rprintf("start\n"); | |
447 #endif | |
448 double* x=REAL(x_R); | |
449 int nx=LENGTH(x_R); | |
450 double max_dist=*REAL(max_dist_R); | |
451 | |
452 SEXP nv; | |
453 PROTECT(nv=allocVector(REALSXP,nx)); | |
454 double* i_nv=REAL(nv); | |
455 for(int i=0;i<nx;i++) { i_nv[i]=0; } | |
456 | |
457 #ifdef DEBUG | |
458 Rprintf("n=%d; max_dist=%d\n",nx,max_dist); | |
459 #endif | |
460 | |
461 for(int i=0;i<nx;i++) { | |
462 for(int j=i+1;j<nx;j++) { | |
463 | |
464 double d=x[j]-x[i]; | |
465 #ifdef DEBUG | |
466 Rprintf("i=%d; j=%d; d=%f\n",i,j,d); | |
467 #endif | |
468 if(d<=max_dist) { | |
469 i_nv[i]++; | |
470 i_nv[j]++; | |
471 } else { | |
472 break; | |
473 } | |
474 } | |
475 } | |
476 | |
477 UNPROTECT(1); | |
478 return(nv); | |
479 } | |
480 | |
481 | |
482 | |
483 | |
484 // given a list of sorted chromosome signal and background vectors (unscaled), determine | |
485 // cluster contigs exceeding thr poisson P value, based on a whs window size, | |
486 // and satisfying mcs cluster size | |
487 SEXP find_poisson_enrichment_clusters(SEXP pos_R,SEXP flag_R,SEXP wsize_R,SEXP thr_R,SEXP mcs_R,SEXP bgm_R,SEXP mintag_R,SEXP either_R) { | |
488 | |
489 #ifdef DEBUG | |
490 Rprintf("start\n"); | |
491 #endif | |
492 double* pos=REAL(pos_R); | |
493 int* flag=INTEGER(flag_R); | |
494 int nt=LENGTH(pos_R); | |
495 | |
496 int mcs=*INTEGER(mcs_R); | |
497 int wsize=*INTEGER(wsize_R); | |
498 int either=*INTEGER(either_R); | |
499 double thr=REAL(thr_R)[0]; | |
500 double bgm=REAL(bgm_R)[0]; | |
501 double mintag=REAL(mintag_R)[0]; | |
502 | |
503 #ifdef DEBUG | |
504 Rprintf("nt=%d; wsize=%d; thr=%f; mcs=%d; min.tag=%f; bgm=%f\n",nt,wsize,thr,mcs,mintag,bgm); | |
505 #endif | |
506 | |
507 | |
508 vector< pair<double,double> > contigs; | |
509 | |
510 // running indecies (start and end) | |
511 int si=0; | |
512 int ei=0; | |
513 | |
514 // current window coordinate | |
515 double ws=pos[0]; | |
516 | |
517 // current window tag counts | |
518 int cc[2]={0,0}; | |
519 | |
520 | |
521 if(nt>0) { | |
522 cc[flag[si]]++; | |
523 // increment window end | |
524 while(ei<(nt-1) && (pos[ei+1]-ws) <= wsize) { | |
525 ei++; | |
526 cc[flag[ei]]++; | |
527 } | |
528 | |
529 | |
530 // cluster start,end positions | |
531 double cs,ce; | |
532 int inclust=0; | |
533 | |
534 while(si<nt-1) { | |
535 | |
536 if((pos[si+1]-ws) > (pos[ei+1] - ws - wsize) && ei!=(nt-1)) { | |
537 // move end boudnary | |
538 ei++; | |
539 ws=pos[ei]-wsize; | |
540 cc[flag[ei]]++; | |
541 while(ei<(nt-1) && pos[ei+1]==ws+wsize) { | |
542 ei++; | |
543 cc[flag[ei]]++; | |
544 } | |
545 | |
546 // increment window start | |
547 while(si<(nt-1) && pos[si] < ws) { | |
548 cc[flag[si]]--; | |
549 si++; | |
550 } | |
551 | |
552 } else { | |
553 // move up start boundary | |
554 ws=pos[si+1]; | |
555 cc[flag[si]]--; | |
556 si++; | |
557 while(si<(nt-1) && pos[si+1]==ws) { | |
558 cc[flag[si]]--; | |
559 si++; | |
560 } | |
561 | |
562 // increment window end | |
563 while(ei<(nt-1) && (pos[ei+1] - ws) <= wsize) { | |
564 ei++; | |
565 cc[flag[ei]]++; | |
566 } | |
567 | |
568 } | |
569 | |
570 // calculate z score | |
571 double dc0=((double)cc[0])+0.5; | |
572 double dc1=((double)cc[1])+0.5; | |
573 double rte=dc0+dc1-0.25*thr*thr; | |
574 double lb; | |
575 if(rte<=0) { | |
576 lb=0; | |
577 } else { | |
578 lb=(sqrt(dc1*dc0) - 0.5*thr*sqrt(rte))/(dc0 - 0.25*thr*thr); | |
579 if(lb<0) { lb=0; } | |
580 lb*=lb; | |
581 } | |
582 | |
583 //Rprintf("%f=f(%f,%f,%f); %f=f(%f,%f,%f)\n",lb,1.0-thr,2.0*dc1,2.0*dc0,ub,thr,2.0*dc1,2.0*dc0); | |
584 | |
585 #ifdef DEBUG | |
586 //double ub=gsl_cdf_fdist_Qinv(thr,2.0*dc1,2.0*dc0)*dc1/dc0; | |
587 double ub=(sqrt(dc1*dc0) + 0.5*thr*sqrt(rte))/(dc0 - 0.25*thr*thr); | |
588 ub*=ub; | |
589 Rprintf("s=%d (%f); e=%d (%f); window: %f-%f; cc=[%d,%d]; lb=%f; ub=%f\n",si,pos[si],ei,pos[ei],ws,ws+wsize,cc[0],cc[1],lb,ub); | |
590 #endif | |
591 | |
592 int bc=lb>=bgm && cc[1]>=mintag; | |
593 if(either) { | |
594 bc=lb>=bgm || cc[1]>=mintag; | |
595 } | |
596 if(bc) { | |
597 if(inclust) { | |
598 double nce=ws+wsize/2.0; | |
599 if(nce-ce > wsize/2.0) { | |
600 // next point is too far removed, end cluster | |
601 if(ce-cs >= mcs) { | |
602 contigs.push_back(pair<double,double>(cs,ce)); | |
603 #ifdef DEBUG | |
604 Rprintf("recorded cluster %f-%f\n",cs,ce); | |
605 #endif | |
606 } | |
607 inclust=0; | |
608 } else { | |
609 ce=nce; | |
610 } | |
611 } else { | |
612 inclust=1; | |
613 cs=ws+wsize/2.0; | |
614 ce=cs; | |
615 } | |
616 } else { | |
617 if(inclust) { | |
618 if(ce-cs >= mcs) { | |
619 contigs.push_back(pair<double,double>(cs,ce)); | |
620 #ifdef DEBUG | |
621 Rprintf("recorded cluster %f-%f\n",cs,ce); | |
622 #endif | |
623 } | |
624 inclust=0; | |
625 } | |
626 } | |
627 | |
628 } | |
629 | |
630 if(inclust) { | |
631 if(ce-cs >= mcs) { | |
632 contigs.push_back(pair<double,double>(cs,ce)); | |
633 #ifdef DEBUG | |
634 Rprintf("recorded cluster %f-%f\n",cs,ce); | |
635 #endif | |
636 } | |
637 inclust=0; | |
638 } | |
639 } | |
640 | |
641 SEXP cs_R,ce_R; | |
642 PROTECT(cs_R=allocVector(REALSXP,contigs.size())); | |
643 PROTECT(ce_R=allocVector(REALSXP,contigs.size())); | |
644 double* csa=REAL(cs_R); | |
645 double* cea=REAL(ce_R); | |
646 | |
647 int i=0; | |
648 for(vector< pair<double,double> >::const_iterator ci=contigs.begin(); ci!=contigs.end();++ci) { | |
649 csa[i]=ci->first; | |
650 cea[i]=ci->second; | |
651 i++; | |
652 } | |
653 | |
654 SEXP ans_R, names_R; | |
655 PROTECT(names_R = allocVector(STRSXP, 2)); | |
656 SET_STRING_ELT(names_R, 0, mkChar("s")); | |
657 SET_STRING_ELT(names_R, 1, mkChar("e")); | |
658 | |
659 PROTECT(ans_R = allocVector(VECSXP, 2)); | |
660 SET_VECTOR_ELT(ans_R, 0, cs_R); | |
661 SET_VECTOR_ELT(ans_R, 1, ce_R); | |
662 setAttrib(ans_R, R_NamesSymbol, names_R); | |
663 | |
664 UNPROTECT(4); | |
665 return(ans_R); | |
666 | |
667 } | |
668 | |
669 | |
670 // finds intersection between a list of regions | |
671 // the flag has +n/-n value, corresponding to the start/end of a segment in n-th regionset | |
672 // max_val: 1 - report max overlapping value, -1: report min, 0 - don't look at values | |
673 // returns: $s, $e, ($v) lists | |
674 SEXP region_intersection(SEXP n_R,SEXP pos_R,SEXP flags_R,SEXP vals_R,SEXP max_val_R,SEXP union_R) { | |
675 const int max_val=*INTEGER(max_val_R); | |
676 const int unionr=*INTEGER(union_R); | |
677 const int n=*INTEGER(n_R); | |
678 double* pos=REAL(pos_R); | |
679 int* flags=INTEGER(flags_R); | |
680 double* val=REAL(vals_R); | |
681 | |
682 #ifdef DEBUG | |
683 Rprintf("n=%d; npos=%d; max_val=%d\n",n,LENGTH(pos_R),max_val); | |
684 #endif | |
685 | |
686 int s[n]; // flag status for each set | |
687 double mv[n]; // max/min value of current clusters | |
688 | |
689 for(int i=0;i<n;i++) { s[i]=0; } | |
690 | |
691 vector<double> starts; | |
692 vector<double> ends; | |
693 vector<double> values; | |
694 | |
695 int start=-1; | |
696 double mval=0; | |
697 for(int i=0;i<LENGTH(pos_R);i++) { | |
698 // update flags | |
699 int f=flags[i]; | |
700 if(f>0) { | |
701 s[abs(f)-1]++; | |
702 } else { | |
703 s[abs(f)-1]--; | |
704 } | |
705 | |
706 if(max_val!=0 && val[i]*max_val > mval*max_val) { mval=val[i]; } | |
707 | |
708 // joined status | |
709 int all; | |
710 if(unionr) { | |
711 all=0; | |
712 for(int j=0;j<n;j++) { if(s[j]>0) { all=1; break;} } | |
713 } else { | |
714 all=1; | |
715 for(int j=0;j<n;j++) { all=all & (s[j]>0); } | |
716 } | |
717 | |
718 | |
719 //Rprintf("i=%d; s=[",i); | |
720 //for(int j=0;j<n;j++) { Rprintf("%d",s[j]); } | |
721 //Rprintf("]; all=%d; start=%d\n",all,start); | |
722 | |
723 if(start>=0) { | |
724 // in fragment | |
725 if(!all) { | |
726 // end fragment | |
727 starts.push_back(pos[start]); | |
728 ends.push_back(pos[i]); | |
729 start=-1; | |
730 if(max_val!=0) { values.push_back(mval); } | |
731 | |
732 #ifdef DEBUG | |
733 Rprintf("recorded new fragment (s=%f,e=%f,v=%f);\n",pos[start],pos[i],mval); | |
734 #endif | |
735 } | |
736 } else { | |
737 // should a fragment be started? | |
738 if(all) { | |
739 start=i; | |
740 if(max_val!=0) { mval=val[i]; } | |
741 #ifdef DEBUG | |
742 Rprintf("starting new fragment (s=%f,i=%d);\n",pos[start],i); | |
743 #endif | |
744 } | |
745 } | |
746 } | |
747 SEXP cs_R,ce_R,cv_R; | |
748 PROTECT(cs_R=allocVector(REALSXP,starts.size())); | |
749 PROTECT(ce_R=allocVector(REALSXP,ends.size())); | |
750 | |
751 double* csa=REAL(cs_R); | |
752 int i=0; | |
753 for(vector<double>::const_iterator ci=starts.begin(); ci!=starts.end(); ++ci) { | |
754 csa[i]=*ci; i++; | |
755 } | |
756 | |
757 csa=REAL(ce_R); | |
758 i=0; | |
759 for(vector<double>::const_iterator ci=ends.begin(); ci!=ends.end(); ++ci) { | |
760 csa[i]=*ci; i++; | |
761 } | |
762 | |
763 if(max_val!=0) { | |
764 PROTECT(cv_R=allocVector(REALSXP,values.size())); | |
765 csa=REAL(cv_R); | |
766 i=0; | |
767 for(vector<double>::const_iterator ci=values.begin(); ci!=values.end(); ++ci) { | |
768 csa[i]=*ci; i++; | |
769 } | |
770 } | |
771 | |
772 SEXP ans_R, names_R; | |
773 if(max_val!=0) { | |
774 PROTECT(names_R = allocVector(STRSXP, 3)); | |
775 SET_STRING_ELT(names_R, 0, mkChar("s")); | |
776 SET_STRING_ELT(names_R, 1, mkChar("e")); | |
777 SET_STRING_ELT(names_R, 2, mkChar("v")); | |
778 | |
779 PROTECT(ans_R = allocVector(VECSXP, 3)); | |
780 SET_VECTOR_ELT(ans_R, 0, cs_R); | |
781 SET_VECTOR_ELT(ans_R, 1, ce_R); | |
782 SET_VECTOR_ELT(ans_R, 2, cv_R); | |
783 } else { | |
784 PROTECT(names_R = allocVector(STRSXP, 2)); | |
785 SET_STRING_ELT(names_R, 0, mkChar("s")); | |
786 SET_STRING_ELT(names_R, 1, mkChar("e")); | |
787 | |
788 PROTECT(ans_R = allocVector(VECSXP, 2)); | |
789 SET_VECTOR_ELT(ans_R, 0, cs_R); | |
790 SET_VECTOR_ELT(ans_R, 1, ce_R); | |
791 } | |
792 | |
793 setAttrib(ans_R, R_NamesSymbol, names_R); | |
794 | |
795 if(max_val!=0) { | |
796 UNPROTECT(5); | |
797 } else { | |
798 UNPROTECT(4); | |
799 } | |
800 return(ans_R); | |
801 } | |
802 | |
803 } | |
804 |