Mercurial > repos > zzhou > spp_phantompeak
view spp/src/wdl.cpp @ 15:e689b83b0257 draft
Uploaded
author | zzhou |
---|---|
date | Tue, 27 Nov 2012 16:15:21 -0500 |
parents | ce08b0efa3fd |
children |
line wrap: on
line source
#include <vector> #include <string.h> #include <iostream> #include <string> #include <set> extern "C" { #include "R.h" #include "Rmath.h" #include "Rinternals.h" #include "Rdefines.h" } using namespace std; using namespace __gnu_cxx; //#define DEBUG 1 extern "C" { /************************************************************************/ /* * lwcc - calculate local window cross-correlation */ SEXP lwcc(SEXP x_R, // positive strand hist SEXP y_R, // negative strand hist of the same length SEXP osize_R, // outer boundary distance SEXP isize_R, // inner boundary distance SEXP return_peaks_R, // whether all correlation values, or just peaks should be returned SEXP min_peak_dist_R, // distance between closest peaks SEXP min_peak_val_R, // min peak threshold SEXP tag_weight_R, // tag weight SEXP bg_subtract_R, // a flag whether do background subtractio SEXP bgp_R, // optional background hist for positive strand SEXP bgn_R, // optional background hist for negative strand SEXP bg_wsize_R, // window size for the background counts SEXP bg_weight_R, // optional weighting for the background tags, must compensate for window size difference (including is cutout) SEXP round_up_R // whether to round up fractional signal tag counts ) { #ifdef DEBUG Rprintf("start\n"); #endif int is=INTEGER(isize_R)[0]; int os=INTEGER(osize_R)[0]; double rs=((double)(2*os+1)); int* x=INTEGER(x_R); int* y=INTEGER(y_R); int n_x=LENGTH(x_R); // background-related int* bgp=INTEGER(bgp_R); int* bgn=INTEGER(bgn_R); int bg_whs=INTEGER(bg_wsize_R)[0]; int return_peaks=*(INTEGER(return_peaks_R)); double min_peak_val=*(REAL(min_peak_val_R)); int min_peak_dist=*(INTEGER(min_peak_dist_R)); double tag_weight=*(REAL(tag_weight_R)); const int round_up=*(INTEGER(round_up_R)); const int bg_subtract=*(INTEGER(bg_subtract_R)); const double bg_weight=*(REAL(bg_weight_R)); int i; // point at which the value is being calculated int start=os; int end=n_x-os-1; // bg tag counts within bg window int bg_pn1=0; int bg_nn1=0; int bg_pn2=0; int bg_nn2=0; // illustration for counting: // // 012345678901234567890123456789012 // ==========------|------========== // // osize=16; isize=6; SEXP nv; double *d_nv; vector<int> ppos; vector<double> pval; if(!return_peaks) { PROTECT(nv=allocVector(REALSXP,n_x)); d_nv=REAL(nv); for(int i=0;i<n_x;i++) { d_nv[i]=0; } } #ifdef DEBUG Rprintf("start=%d end=%d tag_weight=%f\n", start,end,tag_weight); Rprintf("x[1]=%d x[2]=%d y[1]=%d y[2]=%d\n",x[1],x[2],y[1],y[2]); #endif int lpp=-1; // last peak position double lpv=-1e3; // last peak value double ppv=-1e3; // last value double pppv=-11e-3; // value before last int pn1,pn2,nn1,nn2; if(bg_subtract) { // pre-initialize background tag counts, for(int i=0;i<bg_whs;i++) { if(i<n_x) { bg_pn2+=bgp[i]; bg_nn2+=bgn[i]; } } } for(i=0;i<end;i++) { #ifdef DEBUG //Rprintf("i=%d ", i); #endif if(bg_subtract) { // update background counts int nl=i-bg_whs-1; if(nl>=0) { bg_pn1-=bgp[nl]; bg_nn1-=bgn[nl]; } bg_pn1+=bgp[i]; bg_nn1+=bgn[i]; if(i>0) { bg_pn2-=bgp[i-1]; bg_nn2-=bgn[i-1]; } int nr=i+bg_whs; if(nr<n_x) { bg_pn2+=bgp[nr]; bg_nn2+=bgn[nr]; } } if(i >= start) { // update counts, taking into account masked out regions pn1=pn2=nn1=nn2=0; for(int k=0;k<=(os-is);k++) { int xp1=x[i-os+k]; int xp2=x[i+os-k]; int xn1=y[i+os-k]; int xn2=y[i-os+k]; if(xp1!=-1 && xn1!=-1) { pn1+=xp1; nn1+=xn1; } if(xp2!=-1 && xn2!=-1) { pn2+=xp2; nn2+=xn2; } } // calculate the means double mp=((double)(pn1+pn2))/rs; double mn=((double)(pn1+pn2))/rs; #ifdef DEBUG Rprintf("mp=%f mn=%f\n",mp,mn); #endif // calculate correlation double varp=0; double varn=0; double num=0; double val=-1e3; if(mp>0 & mn>0) { for(int k=0;k<=(os-is);k++) { int xp1=x[i-os+k]; int xp2=x[i+os-k]; int xn1=y[i+os-k]; int xn2=y[i-os+k]; if(xp1!=-1 && xn1!=-1) { double nnp1=((double) xp1)-mp; double nnn1=((double) xn1)-mn; num+=nnp1*nnn1; varp+=nnp1*nnp1; varn+=nnn1*nnn1; } if(xp2!=-1 && xn2!=-1) { double nnp2=((double) xp2)-mp; double nnn2=((double) xn2)-mn; num+=nnp2*nnn2; varp+=nnp2*nnp2; varn+=nnn2*nnn2; } } double tagw; double spn1=((double)pn1)*tag_weight; double snn1=((double)nn1)*tag_weight; double spn2=((double)pn2)*tag_weight; double snn2=((double)nn2)*tag_weight; if(round_up) { if(pn1>0 && spn1<1) { spn1=1.0; } //if(pn2>0 && spn2<1) { spn2=1.0; } if(nn1>0 && snn1<1) { snn1=1.0; } //if(nn2>0 && snn2<1) { snn2=1.0; } } if(bg_subtract) { spn1-=((double)bg_pn1)*bg_weight; snn1-=((double)bg_nn2)*bg_weight; spn2-=((double)bg_pn2)*bg_weight; snn2-=((double)bg_nn1)*bg_weight; if(spn2<0) spn2=0; if(snn2<0) snn2=0; if(spn1>0 && snn1>0) { tagw=(2.0*sqrt(spn1*snn1)-(spn2+snn2+1.0)); } else { tagw=-(spn2+snn2+1.0); } //cout<<"bg_pn1="<<bg_pn1<<"; bg_pn2="<<bg_pn2<<"; bg_nn1="<<bg_nn1<<"; bg_nn2="<<bg_nn2<<endl; } else { tagw=2.0*sqrt(spn1*snn1)-(spn2+snn2); } if(tagw<0) { val=0.0; } else { if(num==0.0) { val=0; } else { val=num/(sqrt(varp*varn)); } val=val*sqrt(tagw) + tagw; } //cout<<"val="<<val<<endl; #ifdef DEBUG Rprintf("pn1=%d pn2=%d nn1=%d nn2=%d tag.weight=%f tagw=%f\n",pn1,pn2,nn1,nn2,tag_weight,tagw); Rprintf("tagw=%f varp=%f varn=%f num=%f cor=%f val=%f\n",tagw,varp,varn,num,num/sqrt(varp*varn),val); #endif } if(return_peaks) { // determine if previous position was a peak if(ppv>min_peak_val && ppv>val && ppv>pppv) { if(lpp>0 && (i-lpp+1)>min_peak_dist) { // record previous peak position ppos.push_back(lpp); pval.push_back(lpv); #ifdef DEBUG Rprintf("recording peak x=%d y=%f d=%d\n",lpp,lpv,(i-lpp)); #endif lpp=i-1; lpv=ppv; #ifdef DEBUG Rprintf("updated peak to x=%d y=%f\n",lpp,lpv); #endif } else { if(ppv>lpv) { // update last peak positions #ifdef DEBUG Rprintf("skipping peak x=%d y=%f d=%d in favor of x=%d y=%f\n",lpp,lpv,(i-lpp),i-1,ppv); #endif lpp=i-1; lpv=ppv; } } } // update previous values if(val!=ppv) { pppv=ppv; ppv=val; } } else { d_nv[i]=val; } } } if(return_peaks) { // record last position if(lpp>0) { #ifdef DEBUG Rprintf("recording last peak x=%d y=%f\n",lpp,lpv); #endif ppos.push_back(lpp); pval.push_back(lpv); } SEXP rpp_R,rpv_R; PROTECT(rpp_R=allocVector(INTSXP,ppos.size())); PROTECT(rpv_R=allocVector(REALSXP,ppos.size())); int* rpp=INTEGER(rpp_R); double* rpv=REAL(rpv_R); for(int i=0;i<ppos.size();i++) { rpp[i]=ppos[i]; rpv[i]=pval[i]; } SEXP ans_R, names_R; PROTECT(names_R = allocVector(STRSXP, 2)); SET_STRING_ELT(names_R, 0, mkChar("x")); SET_STRING_ELT(names_R, 1, mkChar("v")); PROTECT(ans_R = allocVector(VECSXP, 2)); SET_VECTOR_ELT(ans_R, 0, rpp_R); SET_VECTOR_ELT(ans_R, 1, rpv_R); setAttrib(ans_R, R_NamesSymbol, names_R); UNPROTECT(4); return(ans_R); } else { UNPROTECT(1); return(nv); } } /************************************************************************/ /* * wtd - window tag difference implementation */ SEXP wtd(SEXP x_R, // positive strand hist SEXP y_R, // negative strand hist of the same length SEXP wsize_R, // outer boundary distance SEXP return_peaks_R, // whether all correlation values, or just peaks should be returned SEXP min_peak_dist_R, // distance between closest peaks SEXP min_peak_val_R, // min peak threshold SEXP direct_count_R, // whether tag weighting should not be done SEXP tag_weight_R, // tag weight SEXP ignore_masking_R, // whether to ignore masked regions SEXP bg_subtract_R, // a flag whether do background subtractio SEXP bgp_R, // optional background hist for positive strand SEXP bgn_R, // optional background hist for negative strand SEXP bg_wsize_R, // window size for the background counts SEXP bg_weight_R, // optional weighting for the background tags, must compensate for window size difference SEXP round_up_R // whether to round up fractional signal tag counts ) { #ifdef DEBUG Rprintf("start\n"); #endif int whs=INTEGER(wsize_R)[0]; int* x=INTEGER(x_R); int* y=INTEGER(y_R); int n_x=LENGTH(x_R); // background-related int* bgp=INTEGER(bgp_R); int* bgn=INTEGER(bgn_R); int bg_whs=INTEGER(bg_wsize_R)[0]; const int return_peaks=*(INTEGER(return_peaks_R)); const int direct_count=*(INTEGER(direct_count_R)); const int ignore_masking=*(INTEGER(ignore_masking_R)); const double min_peak_val=*(REAL(min_peak_val_R)); const int min_peak_dist=*(INTEGER(min_peak_dist_R)); const double tag_weight=*(REAL(tag_weight_R)); const int round_up=*(INTEGER(round_up_R)); const int bg_subtract=*(INTEGER(bg_subtract_R)); const double bg_weight=*(REAL(bg_weight_R)); int i; // point at which the value is being calculated int start=whs+1; int end=n_x-whs-1; // tag counts to calculate the means int pn1=0; int pn2=0; int nn1=0; int nn2=0; // bg tag counts within bg window int bg_pn1=0; int bg_pn2=0; int bg_nn1=0; int bg_nn2=0; SEXP nv; double *d_nv; vector<int> ppos; vector<double> pval; if(!return_peaks) { PROTECT(nv=allocVector(REALSXP,n_x)); d_nv=REAL(nv); for(int i=0;i<n_x;i++) { d_nv[i]=0; } } #ifdef DEBUG Rprintf("whs=%d start=%d end=%d tag_weight=%f ignore_masing=%d\n", whs, start,end,tag_weight,ignore_masking); Rprintf("x[1]=%d x[2]=%d y[1]=%d y[2]=%d\n",x[1],x[2],y[1],y[2]); #endif int lpp=-1; // last peak position double lpv=-1000; // last peak value double ppv=-1000; // last value int ppl=-1; // position of the last value double pppv=-1000; // value before last if(ignore_masking==1) { for(int i=0;i<whs;i++) { pn1+=x[i]; pn2+=x[i+whs+1]; nn1+=y[i]; nn2+=y[i+whs+1]; } } if(bg_subtract) { // pre-initialize background tag counts, for(int i=0;i<bg_whs;i++) { if(i<n_x) { bg_pn2+=bgp[i]; bg_nn2+=bgn[i]; } } // increment center of background count window to the start position for(int i=0;i<start;i++) { // update background counts int nl=i-bg_whs-1; if(nl>=0) { bg_pn1-=bgp[nl]; bg_nn1-=bgn[nl]; } bg_pn1+=bgp[i]; bg_nn1+=bgn[i]; if(i>0) { bg_pn2-=bgp[i-1]; bg_nn2-=bgn[i-1]; } int nr=i+bg_whs; if(nr<n_x) { bg_pn2+=bgp[nr]; bg_nn2+=bgn[nr]; } } } #ifdef DEBUG Rprintf("initialization: i=%d pn1=%d, pn2=%d, nn1=%d, nn2=%d", i,pn1,pn2,nn1,nn2); #endif for(i=start;i<end;i++) { if(bg_subtract) { // update background counts int nl=i-bg_whs-1; if(nl>=0) { bg_pn1-=bgp[nl]; bg_nn1-=bgn[nl]; } bg_pn1+=bgp[i]; bg_nn1+=bgn[i]; if(i>0) { bg_pn2-=bgp[i-1]; bg_nn2-=bgn[i-1]; } int nr=i+bg_whs; if(nr<n_x) { bg_pn2+=bgp[nr]; bg_nn2+=bgn[nr]; } } // update counts if(ignore_masking==1) { pn1+=x[i-1]-x[i-whs-1]; pn2+=x[i+whs]-x[i-1]; nn1+=y[i-1]-y[i-whs-1]; nn2+=y[i+whs]-y[i-1]; } else { pn1=pn2=nn1=nn2=0; for(int k=0;k<whs;k++) { int xp1=x[i-k-1]; int xp2=x[i+k]; int xn1=y[i-k-1]; int xn2=y[i+k]; // omit masked positions if(xp1!=-1 && xn1!=-1 && xp2!=-1 && xn2!=-1) { pn1+=xp1; nn1+=xn1; pn2+=xp2; nn2+=xn2; } } } double val; double spn1=((double)pn1)*tag_weight; double snn1=((double)nn1)*tag_weight; double spn2=((double)pn2)*tag_weight; double snn2=((double)nn2)*tag_weight; if(round_up) { if(pn1>0 && spn1<1) { spn1=1.0; } //if(pn2>0 && spn2<1) { spn2=1.0; } //if(nn1>0 && snn1<1) { snn1=1.0; } if(nn2>0 && snn2<1) { snn2=1.0; } } if(direct_count) { val=spn1+snn2; if(round_up && val<1) { val=1.0; } if(bg_subtract) { val-=((double) (bg_pn1+bg_nn2))*bg_weight; } } else { if(bg_subtract) { spn1-=((double)bg_pn1)*bg_weight; snn1-=((double)bg_nn1)*bg_weight; spn2-=((double)bg_pn2)*bg_weight; snn2-=((double)bg_nn2)*bg_weight; if(spn2<0) spn2=0; if(snn1<0) snn1=0; if(spn1>0 && snn2>0) { val=(2.0*sqrt(spn1*snn2)-(spn2+snn1+1.0)); } else { val=-(spn2+snn1+1.0); } } else { val=2.0*sqrt(spn1*snn2)-(spn2+snn1+tag_weight); } } //double val=sqrt(pn1*nn2); //if(pn2>nn1) { val-=pn2; } else { val-=pn1; } #ifdef DEBUG Rprintf("update: i=%d pn1=%d pn2=%d nn1=%d nn2=%d val=%f\n",i,pn1,pn2,nn1,nn2,val); #endif if(return_peaks) { // determine if previous position was a peak if(ppv>min_peak_val && ppv>val && ppv>pppv) { if(lpp>0 && (i-lpp+1)>min_peak_dist) { // record previous peak position ppos.push_back(lpp); pval.push_back(lpv); #ifdef DEBUG Rprintf("recording peak x=%d y=%f d=%d\n",lpp,lpv,(i-lpp)); #endif if(ppl!=-1 && ppl!=i-1) { lpp=(int) round((ppl+i-1)/2); } else { lpp=i-1; } lpv=ppv; #ifdef DEBUG Rprintf("updated peak to x=%d y=%f\n",lpp,lpv); #endif } else { if(ppv>lpv) { // update last peak positions #ifdef DEBUG Rprintf("skipping peak x=%d y=%f d=%d in favor of x=%d y=%f\n",lpp,lpv,(i-lpp),i-1,ppv); #endif if(ppl!=-1 && ppl!=i-1) { lpp=(int) round((ppl+i-1)/2); } else { lpp=i-1; } lpv=ppv; } } } // update previous values if(val!=ppv) { pppv=ppv; ppv=val; ppl=i; } } else { d_nv[i]=val; } } if(return_peaks) { // record last position if(lpp>0) { #ifdef DEBUG Rprintf("recording last peak x=%d y=%f\n",lpp,lpv); #endif ppos.push_back(lpp); pval.push_back(lpv); } SEXP rpp_R,rpv_R; PROTECT(rpp_R=allocVector(INTSXP,ppos.size())); PROTECT(rpv_R=allocVector(REALSXP,ppos.size())); int* rpp=INTEGER(rpp_R); double* rpv=REAL(rpv_R); for(int i=0;i<ppos.size();i++) { rpp[i]=ppos[i]; rpv[i]=pval[i]; } SEXP ans_R, names_R; PROTECT(names_R = allocVector(STRSXP, 2)); SET_STRING_ELT(names_R, 0, mkChar("x")); SET_STRING_ELT(names_R, 1, mkChar("v")); PROTECT(ans_R = allocVector(VECSXP, 2)); SET_VECTOR_ELT(ans_R, 0, rpp_R); SET_VECTOR_ELT(ans_R, 1, rpv_R); setAttrib(ans_R, R_NamesSymbol, names_R); UNPROTECT(4); return(ans_R); } else { UNPROTECT(1); return(nv); } } }