31
|
1
|
|
2 #include <map>
|
|
3 #include <iostream>
|
|
4 #include <list>
|
|
5 #include <set>
|
|
6 #include <vector>
|
|
7 #include <sstream>
|
|
8 #include <stdlib.h>
|
|
9 #include <stdio.h>
|
|
10 #include <string.h>
|
|
11
|
|
12 #ifdef __cplusplus
|
|
13 extern "C" {
|
|
14 #endif
|
|
15
|
|
16 using namespace std;
|
|
17
|
|
18 #define MISSING_VAL -99
|
|
19
|
|
20 class segment {
|
|
21 public:
|
|
22 string target;
|
|
23 string chrome;
|
|
24 int start, end;
|
|
25 float value;
|
|
26 };
|
|
27
|
|
28 typedef map<string,list<segment> > chromemap;
|
|
29 typedef map< string, chromemap > segmap;
|
|
30 typedef map<string,set<int> > breakmap;
|
|
31 typedef vector<vector<float> > genemap;
|
|
32
|
|
33
|
|
34 segmap * new_segment() {
|
|
35 return new segmap();
|
|
36 }
|
|
37
|
|
38 set<string> * new_target_set() {
|
|
39 return new set<string>();
|
|
40 }
|
|
41
|
|
42 void add_segment(segmap * seg, set<string> *targetSet, char *sample, char *chrom, int chrom_start, int chrom_end, float value) {
|
|
43 string tmp;
|
|
44 segment a;
|
|
45
|
|
46 a.target = strdup(sample);
|
|
47 a.chrome = strdup(chrom);
|
|
48 a.start = chrom_start;
|
|
49 a.end = chrom_end;
|
|
50 a.value = value;
|
|
51
|
|
52 if ( a.target.size() > 0 ) {
|
|
53 if ( a.chrome.compare("X") == 0 )
|
|
54 a.chrome = "23";
|
|
55 if ( a.chrome.compare("Y") == 0 )
|
|
56 a.chrome = "24";
|
|
57
|
|
58 if ( a.chrome.compare("23") == 0 )
|
|
59 a.chrome = "chrX";
|
|
60 else if ( a.chrome.compare("24") == 0 )
|
|
61 a.chrome = "chrY";
|
|
62 else if ( a.chrome.find("chr") != 0 )
|
|
63 a.chrome = string("chr") + a.chrome;
|
|
64
|
|
65 if(seg->find(a.target) == seg->end()) {
|
|
66 map<string, list<segment> > l;
|
|
67 (*seg)[ a.target ] = l;
|
|
68 }
|
|
69 if ( (*seg)[ a.target ].find( a.chrome ) == (*seg)[ a.target ].end() ) {
|
|
70 list<segment> l;
|
|
71 (*seg)[ a.target ][ a.chrome ] = l;
|
|
72 }
|
|
73 (*seg)[ a.target ][a.chrome].push_back( a );
|
|
74 targetSet->insert( a.target );
|
|
75 }
|
|
76 }
|
|
77
|
|
78
|
|
79
|
54
|
80 void print_matrix(segmap *data, set<string> *targetSet, void (*print)(const char *), char* NORMAL_CNV) {
|
31
|
81
|
|
82 //create a break map
|
|
83 breakmap breaks;
|
|
84 for( segmap::iterator t = data->begin(); t!=data->end(); ++t) {
|
|
85 for ( chromemap::iterator c = t->second.begin(); c != t->second.end(); ++c) {
|
|
86 for ( list<segment>::iterator s = c->second.begin(); s != c->second.end(); s++ ) {
|
|
87 if ( breaks.find( s->chrome ) == breaks.end() ) {
|
|
88 set<int> ns;
|
|
89 ns.insert( s->start );
|
|
90 ns.insert( s->end );
|
|
91 breaks[ s->chrome ] = ns;
|
|
92 } else {
|
|
93 breaks[ s->chrome ].insert( s->start );
|
|
94 breaks[ s->chrome ].insert( s->end );
|
|
95 }
|
|
96 }
|
|
97 }
|
|
98 }
|
|
99
|
|
100 //print out the matrix column names
|
|
101 (*print)( "probe" );
|
|
102 for ( set<string>::iterator ts = targetSet->begin(); ts != targetSet->end(); ++ts ) {
|
|
103 (*print)( "\t" );
|
|
104 (*print)( (*ts).c_str() );
|
|
105 }
|
|
106 (*print)( "\n" );
|
|
107 int targetCount = targetSet->size();
|
|
108 //check breakmap for each chrome
|
|
109 for ( breakmap::iterator b = breaks.begin(); b != breaks.end(); ++b ) {
|
|
110 vector<int> starts;
|
|
111 vector<int> ends;
|
|
112 vector<string> probeName;
|
|
113
|
|
114 breakmap::iterator cset = breaks.find( b->first );
|
|
115 int blockCount = cset->second.size() - 1;
|
|
116 starts.resize( blockCount );
|
|
117 ends.resize( blockCount );
|
|
118 probeName.resize( blockCount );
|
|
119 int i = 0;
|
|
120 for ( set<int>::iterator v = cset->second.begin(); v != cset->second.end(); ++v ) {
|
|
121 if ( i > 0 ) {
|
|
122 ends[i-1] = (*v);
|
|
123 }
|
|
124 if ( i < blockCount ) {
|
|
125 starts[ i ] = (*v);
|
|
126 }
|
|
127 i++;
|
|
128 }
|
|
129
|
|
130 //create names for the breakpoints
|
|
131 genemap gm;
|
|
132 gm.resize( blockCount );
|
|
133 for ( int i = 0; i < blockCount; i++ ) {
|
|
134 stringstream name;
|
|
135 name << b->first;
|
|
136 name << "_";
|
|
137 name << starts[i];
|
|
138 name << "_";
|
|
139 name << ends[i];
|
|
140 probeName[i] = name.str();
|
|
141 gm[ i ].resize( targetCount );
|
|
142 }
|
|
143 //scan the targets
|
|
144 int curTarget = 0;
|
|
145 for ( set<string>::iterator t = targetSet->begin(); t != targetSet->end(); ++t ) {
|
|
146 //find the target values for the current chrome
|
|
147 segmap::iterator s = data->find( (*t) );
|
|
148 chromemap::iterator ss = s->second.find( b->first );
|
|
149 if ( ss != s->second.end() ) {
|
|
150 vector<float> vals;
|
|
151 vals.resize( blockCount );
|
|
152 for ( int i = 0; i < blockCount; i++ ) {
|
|
153 vals[ i ] = MISSING_VAL;
|
|
154 }
|
|
155 //if the segment overlaps the break, assign the value
|
|
156 for ( list<segment>::iterator cs = ss->second.begin(); cs != ss->second.end(); cs++ ) {
|
|
157 for ( int i = 0; i < blockCount; i++ ) {
|
|
158 if ( cs->end > starts[i] && cs->start < ends[i] ) {
|
|
159 vals[i] = cs->value;
|
|
160 }
|
|
161 }
|
|
162 }
|
|
163 //assign the values to the named map
|
|
164 for ( int i = 0; i < blockCount; i++ ) {
|
|
165 gm[ i ][ curTarget ] = vals[ i ];
|
|
166 }
|
|
167 //cout << s->first << "\t" << ss->first << "\n";
|
|
168 }
|
|
169 curTarget++;
|
|
170 }
|
|
171 //print out this chrome's segments
|
|
172 //and the values for each target
|
|
173 for ( int i =0; i < blockCount; i++ ) {
|
|
174 print(probeName[i].c_str());
|
|
175 for ( int j = 0; j < targetCount; j++ ) {
|
|
176 float val = gm[i][j];
|
|
177 print("\t");
|
|
178 if ( val == MISSING_VAL )
|
54
|
179 print(NORMAL_CNV);
|
31
|
180 else {
|
|
181 char str[20] = "";
|
|
182 sprintf(str, "%f", val);
|
|
183 print(str);
|
|
184 }
|
|
185 }
|
|
186 print("\n");
|
|
187 }
|
|
188 }
|
|
189
|
|
190 }
|
|
191
|
|
192
|
|
193 /*
|
|
194 * This program read's the stdin looking for a segment format of
|
|
195 * <targetname> <chrome_number> <chrome_start> <chrome_end>
|
|
196 * breaks the segments into discreate probes (so segments on different targets
|
|
197 * have the same probe name) and creates a matrix of probe values for each target
|
|
198 */
|
|
199
|
|
200
|
|
201 int main(int argc, char** argv) {
|
|
202
|
|
203 segmap data;
|
|
204 set<string> targetSet;
|
|
205
|
|
206 //load in segment map
|
|
207 do {
|
|
208 string tmp;
|
|
209 segment a;
|
|
210 cin >> a.target;
|
|
211 cin >> a.chrome;
|
|
212 cin >> tmp;
|
|
213 a.start = atoi( tmp.c_str() );
|
|
214 cin >> tmp;
|
|
215 a.end = atoi( tmp.c_str() );
|
|
216 cin >> tmp;
|
|
217 a.value = atof( tmp.c_str() );
|
|
218 if ( a.target.size() > 0 ) {
|
|
219 if ( a.chrome.compare("X") == 0 )
|
|
220 a.chrome = "23";
|
|
221 if ( a.chrome.compare("Y") == 0 )
|
|
222 a.chrome = "24";
|
|
223
|
|
224 if ( a.chrome.compare("23") == 0 )
|
|
225 a.chrome = "chrX";
|
|
226 else if ( a.chrome.compare("24") == 0 )
|
|
227 a.chrome = "chrY";
|
|
228 else
|
|
229 a.chrome = string("chr") + a.chrome;
|
|
230
|
|
231 if(data.find(a.target) == data.end()) {
|
|
232 map<string, list<segment> > l;
|
|
233 data[ a.target ] = l;
|
|
234 }
|
|
235 if ( data[ a.target ].find( a.chrome ) == data[ a.target ].end() ) {
|
|
236 list<segment> l;
|
|
237 data[ a.target ][ a.chrome ] = l;
|
|
238 }
|
|
239 data[ a.target ][a.chrome].push_back( a );
|
|
240 targetSet.insert( a.target );
|
|
241 }
|
|
242 } while ( cin );
|
|
243
|
|
244 //create a break map
|
|
245 breakmap breaks;
|
|
246 for( segmap::iterator t = data.begin(); t!=data.end(); ++t) {
|
|
247 for ( chromemap::iterator c = t->second.begin(); c != t->second.end(); ++c) {
|
|
248 for ( list<segment>::iterator s = c->second.begin(); s != c->second.end(); s++ ) {
|
|
249 if ( breaks.find( s->chrome ) == breaks.end() ) {
|
|
250 set<int> ns;
|
|
251 ns.insert( s->start );
|
|
252 ns.insert( s->end );
|
|
253 breaks[ s->chrome ] = ns;
|
|
254 } else {
|
|
255 breaks[ s->chrome ].insert( s->start );
|
|
256 breaks[ s->chrome ].insert( s->end );
|
|
257 }
|
|
258 }
|
|
259 }
|
|
260 }
|
|
261
|
|
262 //print out the matrix column names
|
|
263 cout << "probe";
|
|
264 for ( set<string>::iterator ts = targetSet.begin(); ts != targetSet.end(); ++ts ) {
|
|
265 cout << "\t";
|
|
266 cout << (*ts);
|
|
267 }
|
|
268 cout << "\n";
|
|
269 int targetCount = targetSet.size();
|
|
270 //check breakmap for each chrome
|
|
271 for ( breakmap::iterator b = breaks.begin(); b != breaks.end(); ++b ) {
|
|
272 cerr << b->first << "\n";
|
|
273 vector<int> starts;
|
|
274 vector<int> ends;
|
|
275 vector<string> probeName;
|
|
276
|
|
277 breakmap::iterator cset = breaks.find( b->first );
|
|
278 int blockCount = cset->second.size() - 1;
|
|
279 starts.resize( blockCount );
|
|
280 ends.resize( blockCount );
|
|
281 probeName.resize( blockCount );
|
|
282 int i = 0;
|
|
283 for ( set<int>::iterator v = cset->second.begin(); v != cset->second.end(); ++v ) {
|
|
284 if ( i > 0 ) {
|
|
285 ends[i-1] = (*v);
|
|
286 }
|
|
287 if ( i < blockCount ) {
|
|
288 starts[ i ] = (*v);
|
|
289 }
|
|
290 i++;
|
|
291 }
|
|
292
|
|
293 //create names for the breakpoints
|
|
294 genemap gm;
|
|
295 gm.resize( blockCount );
|
|
296 for ( int i = 0; i < blockCount; i++ ) {
|
|
297 stringstream name;
|
|
298 name << b->first;
|
|
299 name << "_";
|
|
300 name << starts[i];
|
|
301 name << "_";
|
|
302 name << ends[i];
|
|
303 probeName[i] = name.str();
|
|
304 gm[ i ].resize( targetCount );
|
|
305 }
|
|
306 //scan the targets
|
|
307 int curTarget = 0;
|
|
308 for ( set<string>::iterator t = targetSet.begin(); t != targetSet.end(); ++t ) {
|
|
309 //find the target values for the current chrome
|
|
310 segmap::iterator s = data.find( (*t) );
|
|
311 chromemap::iterator ss = s->second.find( b->first );
|
|
312 if ( ss != s->second.end() ) {
|
|
313 vector<float> vals;
|
|
314 vals.resize( blockCount );
|
|
315 for ( int i = 0; i < blockCount; i++ ) {
|
|
316 vals[ i ] = MISSING_VAL;
|
|
317 }
|
|
318 //if the segment overlaps the break, assign the value
|
|
319 for ( list<segment>::iterator cs = ss->second.begin(); cs != ss->second.end(); cs++ ) {
|
|
320 for ( int i = 0; i < blockCount; i++ ) {
|
|
321 if ( cs->end > starts[i] && cs->start < ends[i] ) {
|
|
322 vals[i] = cs->value;
|
|
323 }
|
|
324 }
|
|
325 }
|
|
326 //assign the values to the named map
|
|
327 for ( int i = 0; i < blockCount; i++ ) {
|
|
328 gm[ i ][ curTarget ] = vals[ i ];
|
|
329 }
|
|
330 //cout << s->first << "\t" << ss->first << "\n";
|
|
331 }
|
|
332 curTarget++;
|
|
333 }
|
|
334 //print out this chrome's segments
|
|
335 //and the values for each target
|
|
336 for ( int i =0; i < blockCount; i++ ) {
|
|
337 cout << probeName[i];
|
|
338 for ( int j = 0; j < targetCount; j++ ) {
|
|
339 float val = gm[i][j];
|
|
340 cout << "\t";
|
|
341 if ( val == MISSING_VAL )
|
|
342 cout << "null";
|
|
343 else
|
|
344 cout << val;
|
|
345 }
|
|
346 cout << "\n";
|
|
347 }
|
|
348 }
|
|
349
|
|
350 return 0;
|
|
351 }
|
|
352
|
|
353 #ifdef __cplusplus
|
|
354 }
|
|
355 #endif
|