Mercurial > repos > zzhou > spp_phantompeak
comparison spp/src/maqread.cpp @ 6:ce08b0efa3fd draft
Uploaded
author | zzhou |
---|---|
date | Tue, 27 Nov 2012 16:11:40 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:608a8e0eac56 | 6:ce08b0efa3fd |
---|---|
1 #include "pc.h" | |
2 #include <vector> | |
3 #include <string.h> | |
4 #include <iostream> | |
5 #include <fstream> | |
6 #include <sstream> | |
7 #include <strstream> | |
8 #include <algorithm> | |
9 #include <string> | |
10 #include <functional> | |
11 #include <utility> | |
12 #include <zlib.h> | |
13 | |
14 extern "C" { | |
15 #include "R.h" | |
16 #include "Rmath.h" | |
17 #include "Rinternals.h" | |
18 #include "Rdefines.h" | |
19 #include "maqmap.h" | |
20 } | |
21 | |
22 using namespace std; | |
23 using namespace __gnu_cxx; | |
24 | |
25 | |
26 class lessAbsoluteValue { | |
27 public: | |
28 bool operator()(int a, int b) const { | |
29 return abs(a) < abs(b); | |
30 } | |
31 }; | |
32 | |
33 | |
34 | |
35 //#define DEBUG 1 | |
36 | |
37 extern "C" { | |
38 | |
39 // read in text version of maq map | |
40 SEXP read_binmaqmap(SEXP filename,SEXP read_tag_names_R) { | |
41 | |
42 #ifdef DEBUG | |
43 Rprintf("start\n"); | |
44 #endif | |
45 const char* fname=CHAR(asChar(filename)); | |
46 int read_names=*(INTEGER(read_tag_names_R)); | |
47 #ifdef DEBUG | |
48 Rprintf("fname=%s\n",fname); | |
49 #endif | |
50 | |
51 // main data vector | |
52 // chr - pos | |
53 vector< vector<int> > pos; | |
54 vector< vector<int> > posnm; // number of mismatches | |
55 vector< vector<string> > tagnames; | |
56 | |
57 // chromosome map | |
58 hash_map<string, int, hash<string>,equal_to<string> > cind_map; | |
59 vector<string> cnames; | |
60 | |
61 | |
62 gzFile f=gzopen(fname,"r"); | |
63 | |
64 maqmap_t *m = maqmap_read_header(f); | |
65 maqmap1_t *m1, mm1; | |
66 m1 = &mm1; | |
67 | |
68 if (!f) { | |
69 cout<<"can't open input file \""<<fname<<"\"\n"; | |
70 } else { | |
71 Rprintf("opened %s\n",fname); | |
72 | |
73 // read in bed line | |
74 string line; | |
75 int fcount=0; | |
76 while(maqmap_read1(f, m1)) { | |
77 string tagname=string(m1->name); | |
78 string chr=string(m->ref_name[m1->seqid]); | |
79 int len=m1->size; | |
80 int fpos=(m1->pos>>1) + 1; | |
81 if(m1->pos&1) { | |
82 fpos=-1*(fpos+len-1); | |
83 } | |
84 int nm=m1->info1&0xf; | |
85 | |
86 #ifdef DEBUG | |
87 Rprintf("read in map line chr=%s tagname=%s fpos=%d, nm=%d, len=%d\n",chr.c_str(),tagname.c_str(),fpos,nm,len); | |
88 #endif | |
89 | |
90 | |
91 // determine the chromosome index | |
92 hash_map<string, int, hash<string>,equal_to<string> >::const_iterator li=cind_map.find(chr); | |
93 int cind=-1; | |
94 if(li==cind_map.end()) { | |
95 // register new chromosome | |
96 cind=cnames.size(); | |
97 cnames.push_back(chr); | |
98 cind_map[chr]=cind; | |
99 // allocate new pos vector | |
100 pos.push_back(vector<int>()); | |
101 posnm.push_back(vector<int>()); | |
102 if(read_names) { | |
103 tagnames.push_back(vector<string>()); | |
104 } | |
105 #ifdef DEBUG | |
106 Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size()); | |
107 #endif | |
108 } else { | |
109 cind=li->second; | |
110 } | |
111 fcount++; | |
112 (pos[cind]).push_back(fpos); | |
113 (posnm[cind]).push_back(nm); | |
114 if(read_names) { | |
115 (tagnames[cind]).push_back(tagname); | |
116 } | |
117 #ifdef DEBUG | |
118 Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len); | |
119 if(fcount>30) { | |
120 break; | |
121 } | |
122 #endif | |
123 | |
124 } | |
125 gzclose(f); | |
126 Rprintf("done. read %d fragments\n",fcount); | |
127 } | |
128 | |
129 | |
130 // construct output structures | |
131 SEXP chnames; | |
132 int np=0; // number of protections | |
133 PROTECT(chnames = allocVector(STRSXP, cnames.size())); | |
134 for(vector<string>::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) { | |
135 SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str())); | |
136 } | |
137 np++; | |
138 | |
139 // sort | |
140 //for(vector<vector<int> >::iterator csi=pos.begin();csi!=pos.end();++csi) { | |
141 // sort(csi->begin(), csi->end(), lessAbsoluteValue()); | |
142 //} | |
143 | |
144 SEXP ans; | |
145 PROTECT(ans = allocVector(VECSXP, cnames.size())); np++; | |
146 vector<vector<int> >::const_iterator nsi; | |
147 vector<vector<string> >::const_iterator ssi; | |
148 for(vector<vector<int> >::const_iterator csi=pos.begin();csi!=pos.end();++csi) { | |
149 nsi=posnm.begin()+(csi-pos.begin()); | |
150 | |
151 SEXP dv,dnames_R; | |
152 PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++; | |
153 SET_STRING_ELT(dnames_R, 0, mkChar("t")); | |
154 SET_STRING_ELT(dnames_R, 1, mkChar("n")); | |
155 if(read_names) { | |
156 SET_STRING_ELT(dnames_R, 2, mkChar("s")); | |
157 } | |
158 | |
159 | |
160 | |
161 SEXP tv,nv,sv; | |
162 PROTECT(tv=allocVector(INTSXP,csi->size())); np++; | |
163 PROTECT(nv=allocVector(INTSXP,csi->size())); np++; | |
164 if(read_names) { | |
165 PROTECT(sv=allocVector(STRSXP,csi->size())); np++; | |
166 } | |
167 int* i_tv=INTEGER(tv); | |
168 int* i_nv=INTEGER(nv); | |
169 | |
170 int i=0; | |
171 vector<int>::const_iterator ini=nsi->begin(); | |
172 for(vector<int> ::const_iterator pi=csi->begin();pi!=csi->end();++pi) { | |
173 i_tv[i]=*pi; | |
174 i_nv[i]=*ini++; | |
175 i++; | |
176 } | |
177 if(read_names) { | |
178 int i=0; | |
179 ssi=tagnames.begin()+(csi-pos.begin()); | |
180 for(vector<string>::const_iterator si=ssi->begin();si!=ssi->end();++si) { | |
181 SET_STRING_ELT(sv,i,mkChar(si->c_str())); | |
182 i++; | |
183 } | |
184 } | |
185 PROTECT(dv = allocVector(VECSXP, 2+read_names)); np++; | |
186 SET_VECTOR_ELT(dv, 0, tv); | |
187 SET_VECTOR_ELT(dv, 1, nv); | |
188 if(read_names) { | |
189 SET_VECTOR_ELT(dv, 2, sv); | |
190 } | |
191 setAttrib(dv, R_NamesSymbol, dnames_R); | |
192 | |
193 SET_VECTOR_ELT(ans, csi-pos.begin(), dv); | |
194 } | |
195 | |
196 setAttrib(ans,R_NamesSymbol,chnames); | |
197 | |
198 #ifdef DEBUG | |
199 Rprintf("unprotecting %d elements\n",np); | |
200 #endif | |
201 | |
202 UNPROTECT(np); | |
203 return(ans); | |
204 } | |
205 | |
206 | |
207 } |