6
|
1 #include "pc.h"
|
|
2 #include <vector>
|
|
3 #include <string.h>
|
|
4 #include <iostream>
|
|
5 #include <fstream>
|
|
6 #include <sstream>
|
|
7 #include <strstream>
|
|
8 #include <algorithm>
|
|
9 #include <string>
|
|
10 #include <functional>
|
|
11 #include <utility>
|
|
12 #include <zlib.h>
|
|
13
|
|
14 extern "C" {
|
|
15 #include "R.h"
|
|
16 #include "Rmath.h"
|
|
17 #include "Rinternals.h"
|
|
18 #include "Rdefines.h"
|
|
19 #include "maqmap.h"
|
|
20 }
|
|
21
|
|
22 using namespace std;
|
|
23 using namespace __gnu_cxx;
|
|
24
|
|
25
|
|
26 class lessAbsoluteValue {
|
|
27 public:
|
|
28 bool operator()(int a, int b) const {
|
|
29 return abs(a) < abs(b);
|
|
30 }
|
|
31 };
|
|
32
|
|
33
|
|
34
|
|
35 //#define DEBUG 1
|
|
36
|
|
37 extern "C" {
|
|
38
|
|
39 // read in text version of maq map
|
|
40 SEXP read_binmaqmap(SEXP filename,SEXP read_tag_names_R) {
|
|
41
|
|
42 #ifdef DEBUG
|
|
43 Rprintf("start\n");
|
|
44 #endif
|
|
45 const char* fname=CHAR(asChar(filename));
|
|
46 int read_names=*(INTEGER(read_tag_names_R));
|
|
47 #ifdef DEBUG
|
|
48 Rprintf("fname=%s\n",fname);
|
|
49 #endif
|
|
50
|
|
51 // main data vector
|
|
52 // chr - pos
|
|
53 vector< vector<int> > pos;
|
|
54 vector< vector<int> > posnm; // number of mismatches
|
|
55 vector< vector<string> > tagnames;
|
|
56
|
|
57 // chromosome map
|
|
58 hash_map<string, int, hash<string>,equal_to<string> > cind_map;
|
|
59 vector<string> cnames;
|
|
60
|
|
61
|
|
62 gzFile f=gzopen(fname,"r");
|
|
63
|
|
64 maqmap_t *m = maqmap_read_header(f);
|
|
65 maqmap1_t *m1, mm1;
|
|
66 m1 = &mm1;
|
|
67
|
|
68 if (!f) {
|
|
69 cout<<"can't open input file \""<<fname<<"\"\n";
|
|
70 } else {
|
|
71 Rprintf("opened %s\n",fname);
|
|
72
|
|
73 // read in bed line
|
|
74 string line;
|
|
75 int fcount=0;
|
|
76 while(maqmap_read1(f, m1)) {
|
|
77 string tagname=string(m1->name);
|
|
78 string chr=string(m->ref_name[m1->seqid]);
|
|
79 int len=m1->size;
|
|
80 int fpos=(m1->pos>>1) + 1;
|
|
81 if(m1->pos&1) {
|
|
82 fpos=-1*(fpos+len-1);
|
|
83 }
|
|
84 int nm=m1->info1&0xf;
|
|
85
|
|
86 #ifdef DEBUG
|
|
87 Rprintf("read in map line chr=%s tagname=%s fpos=%d, nm=%d, len=%d\n",chr.c_str(),tagname.c_str(),fpos,nm,len);
|
|
88 #endif
|
|
89
|
|
90
|
|
91 // determine the chromosome index
|
|
92 hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr);
|
|
93 int cind=-1;
|
|
94 if(li==cind_map.end()) {
|
|
95 // register new chromosome
|
|
96 cind=cnames.size();
|
|
97 cnames.push_back(chr);
|
|
98 cind_map[chr]=cind;
|
|
99 // allocate new pos vector
|
|
100 pos.push_back(vector<int>());
|
|
101 posnm.push_back(vector<int>());
|
|
102 if(read_names) {
|
|
103 tagnames.push_back(vector<string>());
|
|
104 }
|
|
105 #ifdef DEBUG
|
|
106 Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size());
|
|
107 #endif
|
|
108 } else {
|
|
109 cind=li->second;
|
|
110 }
|
|
111 fcount++;
|
|
112 (pos[cind]).push_back(fpos);
|
|
113 (posnm[cind]).push_back(nm);
|
|
114 if(read_names) {
|
|
115 (tagnames[cind]).push_back(tagname);
|
|
116 }
|
|
117 #ifdef DEBUG
|
|
118 Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len);
|
|
119 if(fcount>30) {
|
|
120 break;
|
|
121 }
|
|
122 #endif
|
|
123
|
|
124 }
|
|
125 gzclose(f);
|
|
126 Rprintf("done. read %d fragments\n",fcount);
|
|
127 }
|
|
128
|
|
129
|
|
130 // construct output structures
|
|
131 SEXP chnames;
|
|
132 int np=0; // number of protections
|
|
133 PROTECT(chnames = allocVector(STRSXP, cnames.size()));
|
|
134 for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) {
|
|
135 SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str()));
|
|
136 }
|
|
137 np++;
|
|
138
|
|
139 // sort
|
|
140 //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) {
|
|
141 // sort(csi->begin(), csi->end(), lessAbsoluteValue());
|
|
142 //}
|
|
143
|
|
144 SEXP ans;
|
|
145 PROTECT(ans = allocVector(VECSXP, cnames.size())); np++;
|
|
146 vector<vector<int> >::const_iterator nsi;
|
|
147 vector<vector<string> >::const_iterator ssi;
|
|
148 for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) {
|
|
149 nsi=posnm.begin()+(csi-pos.begin());
|
|
150
|
|
151 SEXP dv,dnames_R;
|
|
152 PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++;
|
|
153 SET_STRING_ELT(dnames_R, 0, mkChar("t"));
|
|
154 SET_STRING_ELT(dnames_R, 1, mkChar("n"));
|
|
155 if(read_names) {
|
|
156 SET_STRING_ELT(dnames_R, 2, mkChar("s"));
|
|
157 }
|
|
158
|
|
159
|
|
160
|
|
161 SEXP tv,nv,sv;
|
|
162 PROTECT(tv=allocVector(INTSXP,csi->size())); np++;
|
|
163 PROTECT(nv=allocVector(INTSXP,csi->size())); np++;
|
|
164 if(read_names) {
|
|
165 PROTECT(sv=allocVector(STRSXP,csi->size())); np++;
|
|
166 }
|
|
167 int* i_tv=INTEGER(tv);
|
|
168 int* i_nv=INTEGER(nv);
|
|
169
|
|
170 int i=0;
|
|
171 vector<int>::const_iterator ini=nsi->begin();
|
|
172 for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) {
|
|
173 i_tv[i]=*pi;
|
|
174 i_nv[i]=*ini++;
|
|
175 i++;
|
|
176 }
|
|
177 if(read_names) {
|
|
178 int i=0;
|
|
179 ssi=tagnames.begin()+(csi-pos.begin());
|
|
180 for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) {
|
|
181 SET_STRING_ELT(sv,i,mkChar(si->c_str()));
|
|
182 i++;
|
|
183 }
|
|
184 }
|
|
185 PROTECT(dv = allocVector(VECSXP, 2+read_names)); np++;
|
|
186 SET_VECTOR_ELT(dv, 0, tv);
|
|
187 SET_VECTOR_ELT(dv, 1, nv);
|
|
188 if(read_names) {
|
|
189 SET_VECTOR_ELT(dv, 2, sv);
|
|
190 }
|
|
191 setAttrib(dv, R_NamesSymbol, dnames_R);
|
|
192
|
|
193 SET_VECTOR_ELT(ans, csi-pos.begin(), dv);
|
|
194 }
|
|
195
|
|
196 setAttrib(ans,R_NamesSymbol,chnames);
|
|
197
|
|
198 #ifdef DEBUG
|
|
199 Rprintf("unprotecting %d elements\n",np);
|
|
200 #endif
|
|
201
|
|
202 UNPROTECT(np);
|
|
203 return(ans);
|
|
204 }
|
|
205
|
|
206
|
|
207 }
|