annotate seg2matrix/CGData/CsegToMatrix.cc @ 52:3a036a34c362

better handle of input file
author jingchunzhu
date Thu, 17 Sep 2015 15:00:45 -0700
parents ab20c0d04f4a
children 59dbe857f5d4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
31
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
1
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
2 #include <map>
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
3 #include <iostream>
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
4 #include <list>
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
5 #include <set>
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
6 #include <vector>
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
7 #include <sstream>
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
8 #include <stdlib.h>
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
9 #include <stdio.h>
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
10 #include <string.h>
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
11
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
12 #ifdef __cplusplus
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
13 extern "C" {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
14 #endif
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
15
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
16 using namespace std;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
17
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
18 #define MISSING_VAL -99
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
19
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
20 class segment {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
21 public:
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
22 string target;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
23 string chrome;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
24 int start, end;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
25 float value;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
26 };
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
27
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
28 typedef map<string,list<segment> > chromemap;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
29 typedef map< string, chromemap > segmap;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
30 typedef map<string,set<int> > breakmap;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
31 typedef vector<vector<float> > genemap;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
32
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
33
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
34 segmap * new_segment() {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
35 return new segmap();
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
36 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
37
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
38 set<string> * new_target_set() {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
39 return new set<string>();
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
40 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
41
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
42 void add_segment(segmap * seg, set<string> *targetSet, char *sample, char *chrom, int chrom_start, int chrom_end, float value) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
43 string tmp;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
44 segment a;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
45
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
46 a.target = strdup(sample);
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
47 a.chrome = strdup(chrom);
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
48 a.start = chrom_start;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
49 a.end = chrom_end;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
50 a.value = value;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
51
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
52 if ( a.target.size() > 0 ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
53 if ( a.chrome.compare("X") == 0 )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
54 a.chrome = "23";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
55 if ( a.chrome.compare("Y") == 0 )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
56 a.chrome = "24";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
57
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
58 if ( a.chrome.compare("23") == 0 )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
59 a.chrome = "chrX";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
60 else if ( a.chrome.compare("24") == 0 )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
61 a.chrome = "chrY";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
62 else if ( a.chrome.find("chr") != 0 )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
63 a.chrome = string("chr") + a.chrome;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
64
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
65 if(seg->find(a.target) == seg->end()) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
66 map<string, list<segment> > l;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
67 (*seg)[ a.target ] = l;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
68 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
69 if ( (*seg)[ a.target ].find( a.chrome ) == (*seg)[ a.target ].end() ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
70 list<segment> l;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
71 (*seg)[ a.target ][ a.chrome ] = l;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
72 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
73 (*seg)[ a.target ][a.chrome].push_back( a );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
74 targetSet->insert( a.target );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
75 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
76 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
77
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
78
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
79
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
80 void print_matrix(segmap *data, set<string> *targetSet, void (*print)(const char *)) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
81
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
82 //create a break map
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
83 breakmap breaks;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
84 for( segmap::iterator t = data->begin(); t!=data->end(); ++t) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
85 for ( chromemap::iterator c = t->second.begin(); c != t->second.end(); ++c) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
86 for ( list<segment>::iterator s = c->second.begin(); s != c->second.end(); s++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
87 if ( breaks.find( s->chrome ) == breaks.end() ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
88 set<int> ns;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
89 ns.insert( s->start );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
90 ns.insert( s->end );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
91 breaks[ s->chrome ] = ns;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
92 } else {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
93 breaks[ s->chrome ].insert( s->start );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
94 breaks[ s->chrome ].insert( s->end );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
95 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
96 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
97 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
98 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
99
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
100 //print out the matrix column names
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
101 (*print)( "probe" );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
102 for ( set<string>::iterator ts = targetSet->begin(); ts != targetSet->end(); ++ts ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
103 (*print)( "\t" );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
104 (*print)( (*ts).c_str() );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
105 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
106 (*print)( "\n" );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
107 int targetCount = targetSet->size();
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
108 //check breakmap for each chrome
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
109 for ( breakmap::iterator b = breaks.begin(); b != breaks.end(); ++b ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
110 vector<int> starts;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
111 vector<int> ends;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
112 vector<string> probeName;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
113
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
114 breakmap::iterator cset = breaks.find( b->first );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
115 int blockCount = cset->second.size() - 1;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
116 starts.resize( blockCount );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
117 ends.resize( blockCount );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
118 probeName.resize( blockCount );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
119 int i = 0;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
120 for ( set<int>::iterator v = cset->second.begin(); v != cset->second.end(); ++v ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
121 if ( i > 0 ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
122 ends[i-1] = (*v);
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
123 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
124 if ( i < blockCount ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
125 starts[ i ] = (*v);
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
126 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
127 i++;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
128 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
129
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
130 //create names for the breakpoints
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
131 genemap gm;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
132 gm.resize( blockCount );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
133 for ( int i = 0; i < blockCount; i++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
134 stringstream name;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
135 name << b->first;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
136 name << "_";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
137 name << starts[i];
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
138 name << "_";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
139 name << ends[i];
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
140 probeName[i] = name.str();
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
141 gm[ i ].resize( targetCount );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
142 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
143 //scan the targets
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
144 int curTarget = 0;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
145 for ( set<string>::iterator t = targetSet->begin(); t != targetSet->end(); ++t ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
146 //find the target values for the current chrome
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
147 segmap::iterator s = data->find( (*t) );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
148 chromemap::iterator ss = s->second.find( b->first );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
149 if ( ss != s->second.end() ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
150 vector<float> vals;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
151 vals.resize( blockCount );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
152 for ( int i = 0; i < blockCount; i++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
153 vals[ i ] = MISSING_VAL;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
154 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
155 //if the segment overlaps the break, assign the value
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
156 for ( list<segment>::iterator cs = ss->second.begin(); cs != ss->second.end(); cs++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
157 for ( int i = 0; i < blockCount; i++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
158 if ( cs->end > starts[i] && cs->start < ends[i] ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
159 vals[i] = cs->value;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
160 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
161 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
162 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
163 //assign the values to the named map
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
164 for ( int i = 0; i < blockCount; i++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
165 gm[ i ][ curTarget ] = vals[ i ];
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
166 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
167 //cout << s->first << "\t" << ss->first << "\n";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
168 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
169 curTarget++;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
170 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
171 //print out this chrome's segments
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
172 //and the values for each target
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
173 for ( int i =0; i < blockCount; i++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
174 print(probeName[i].c_str());
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
175 for ( int j = 0; j < targetCount; j++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
176 float val = gm[i][j];
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
177 print("\t");
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
178 if ( val == MISSING_VAL )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
179 print("NA");
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
180 else {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
181 char str[20] = "";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
182 sprintf(str, "%f", val);
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
183 print(str);
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
184 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
185 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
186 print("\n");
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
187 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
188 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
189
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
190 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
191
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
192
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
193 /*
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
194 * This program read's the stdin looking for a segment format of
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
195 * <targetname> <chrome_number> <chrome_start> <chrome_end>
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
196 * breaks the segments into discreate probes (so segments on different targets
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
197 * have the same probe name) and creates a matrix of probe values for each target
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
198 */
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
199
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
200
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
201 int main(int argc, char** argv) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
202
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
203 segmap data;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
204 set<string> targetSet;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
205
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
206 //load in segment map
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
207 do {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
208 string tmp;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
209 segment a;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
210 cin >> a.target;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
211 cin >> a.chrome;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
212 cin >> tmp;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
213 a.start = atoi( tmp.c_str() );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
214 cin >> tmp;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
215 a.end = atoi( tmp.c_str() );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
216 cin >> tmp;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
217 a.value = atof( tmp.c_str() );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
218 if ( a.target.size() > 0 ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
219 if ( a.chrome.compare("X") == 0 )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
220 a.chrome = "23";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
221 if ( a.chrome.compare("Y") == 0 )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
222 a.chrome = "24";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
223
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
224 if ( a.chrome.compare("23") == 0 )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
225 a.chrome = "chrX";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
226 else if ( a.chrome.compare("24") == 0 )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
227 a.chrome = "chrY";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
228 else
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
229 a.chrome = string("chr") + a.chrome;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
230
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
231 if(data.find(a.target) == data.end()) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
232 map<string, list<segment> > l;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
233 data[ a.target ] = l;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
234 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
235 if ( data[ a.target ].find( a.chrome ) == data[ a.target ].end() ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
236 list<segment> l;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
237 data[ a.target ][ a.chrome ] = l;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
238 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
239 data[ a.target ][a.chrome].push_back( a );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
240 targetSet.insert( a.target );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
241 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
242 } while ( cin );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
243
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
244 //create a break map
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
245 breakmap breaks;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
246 for( segmap::iterator t = data.begin(); t!=data.end(); ++t) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
247 for ( chromemap::iterator c = t->second.begin(); c != t->second.end(); ++c) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
248 for ( list<segment>::iterator s = c->second.begin(); s != c->second.end(); s++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
249 if ( breaks.find( s->chrome ) == breaks.end() ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
250 set<int> ns;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
251 ns.insert( s->start );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
252 ns.insert( s->end );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
253 breaks[ s->chrome ] = ns;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
254 } else {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
255 breaks[ s->chrome ].insert( s->start );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
256 breaks[ s->chrome ].insert( s->end );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
257 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
258 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
259 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
260 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
261
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
262 //print out the matrix column names
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
263 cout << "probe";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
264 for ( set<string>::iterator ts = targetSet.begin(); ts != targetSet.end(); ++ts ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
265 cout << "\t";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
266 cout << (*ts);
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
267 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
268 cout << "\n";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
269 int targetCount = targetSet.size();
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
270 //check breakmap for each chrome
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
271 for ( breakmap::iterator b = breaks.begin(); b != breaks.end(); ++b ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
272 cerr << b->first << "\n";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
273 vector<int> starts;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
274 vector<int> ends;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
275 vector<string> probeName;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
276
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
277 breakmap::iterator cset = breaks.find( b->first );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
278 int blockCount = cset->second.size() - 1;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
279 starts.resize( blockCount );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
280 ends.resize( blockCount );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
281 probeName.resize( blockCount );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
282 int i = 0;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
283 for ( set<int>::iterator v = cset->second.begin(); v != cset->second.end(); ++v ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
284 if ( i > 0 ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
285 ends[i-1] = (*v);
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
286 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
287 if ( i < blockCount ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
288 starts[ i ] = (*v);
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
289 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
290 i++;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
291 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
292
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
293 //create names for the breakpoints
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
294 genemap gm;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
295 gm.resize( blockCount );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
296 for ( int i = 0; i < blockCount; i++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
297 stringstream name;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
298 name << b->first;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
299 name << "_";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
300 name << starts[i];
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
301 name << "_";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
302 name << ends[i];
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
303 probeName[i] = name.str();
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
304 gm[ i ].resize( targetCount );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
305 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
306 //scan the targets
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
307 int curTarget = 0;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
308 for ( set<string>::iterator t = targetSet.begin(); t != targetSet.end(); ++t ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
309 //find the target values for the current chrome
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
310 segmap::iterator s = data.find( (*t) );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
311 chromemap::iterator ss = s->second.find( b->first );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
312 if ( ss != s->second.end() ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
313 vector<float> vals;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
314 vals.resize( blockCount );
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
315 for ( int i = 0; i < blockCount; i++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
316 vals[ i ] = MISSING_VAL;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
317 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
318 //if the segment overlaps the break, assign the value
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
319 for ( list<segment>::iterator cs = ss->second.begin(); cs != ss->second.end(); cs++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
320 for ( int i = 0; i < blockCount; i++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
321 if ( cs->end > starts[i] && cs->start < ends[i] ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
322 vals[i] = cs->value;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
323 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
324 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
325 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
326 //assign the values to the named map
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
327 for ( int i = 0; i < blockCount; i++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
328 gm[ i ][ curTarget ] = vals[ i ];
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
329 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
330 //cout << s->first << "\t" << ss->first << "\n";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
331 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
332 curTarget++;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
333 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
334 //print out this chrome's segments
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
335 //and the values for each target
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
336 for ( int i =0; i < blockCount; i++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
337 cout << probeName[i];
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
338 for ( int j = 0; j < targetCount; j++ ) {
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
339 float val = gm[i][j];
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
340 cout << "\t";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
341 if ( val == MISSING_VAL )
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
342 cout << "null";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
343 else
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
344 cout << val;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
345 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
346 cout << "\n";
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
347 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
348 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
349
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
350 return 0;
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
351 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
352
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
353 #ifdef __cplusplus
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
354 }
ab20c0d04f4a add seg2matrix tool
jingchunzhu
parents:
diff changeset
355 #endif