Mercurial > repos > rico > quote_test
comparison dpmix_plot.py @ 0:939c20789501 default tip
Uploaded
| author | rico |
|---|---|
| date | Fri, 06 Apr 2012 10:51:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:939c20789501 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import os | |
| 4 import sys | |
| 5 import math | |
| 6 import matplotlib as mpl | |
| 7 mpl.use('PDF') | |
| 8 import matplotlib.pyplot as plt | |
| 9 from matplotlib.path import Path | |
| 10 import matplotlib.patches as patches | |
| 11 | |
| 12 ################################################################################ | |
| 13 | |
| 14 def build_chrom_len_dict(dbkey, galaxy_data_index_dir): | |
| 15 chrom_len_root = os.path.join(galaxy_data_index_dir, 'shared/ucsc/chrom') | |
| 16 chrom_len_file = '{0}.len'.format(dbkey) | |
| 17 chrom_len_path = os.path.join(chrom_len_root, chrom_len_file) | |
| 18 | |
| 19 chrom_len = {} | |
| 20 | |
| 21 try: | |
| 22 with open(chrom_len_path) as fh: | |
| 23 for line in fh: | |
| 24 line = line.rstrip('\r\n') | |
| 25 elems = line.split() | |
| 26 if len(elems) == 2: | |
| 27 chrom = elems[0] | |
| 28 length = int(elems[1]) | |
| 29 chrom_len[chrom] = length | |
| 30 except: | |
| 31 pass | |
| 32 | |
| 33 return chrom_len | |
| 34 | |
| 35 def parse_input_file(input_file): | |
| 36 chroms = [] | |
| 37 individuals = [] | |
| 38 data = {} | |
| 39 chrom_len = {} | |
| 40 | |
| 41 with open(input_file) as fh: | |
| 42 for line in fh: | |
| 43 line = line.strip() | |
| 44 if line: | |
| 45 elems = line.split() | |
| 46 chrom = elems[0] | |
| 47 p1, p2, state = map(int, elems[1:4]) | |
| 48 id = elems[4] | |
| 49 | |
| 50 if chrom not in chroms: | |
| 51 chroms.append(chrom) | |
| 52 | |
| 53 if id not in individuals: | |
| 54 individuals.append(id) | |
| 55 | |
| 56 data.setdefault(chrom, {}) | |
| 57 data[chrom].setdefault(id, []) | |
| 58 data[chrom][id].append((p1, p2, state)) | |
| 59 | |
| 60 if p2 > chrom_len.setdefault(chrom, 0): | |
| 61 chrom_len[chrom] = p2 | |
| 62 | |
| 63 return chroms, individuals, data, chrom_len | |
| 64 | |
| 65 def check_chroms(chroms, chrom_len, dbkey): | |
| 66 error = 0 | |
| 67 for chrom in chroms: | |
| 68 if chrom not in chrom_len: | |
| 69 print >> sys.stderr, "Can't find length for {0} chromosome {1}".format(dbkey, chrom) | |
| 70 error = 1 | |
| 71 if error: | |
| 72 sys.exit(1) | |
| 73 | |
| 74 def check_data(data, chrom_len, dbkey): | |
| 75 error = 0 | |
| 76 for chrom in data: | |
| 77 chrom_beg = 0 | |
| 78 chrom_end = chrom_len[chrom] | |
| 79 for individual in data[chrom]: | |
| 80 for p1, p2, state in data[chrom][individual]: | |
| 81 if p1 >= p2: | |
| 82 print >> sys.stderr, "Bad data line: begin >= end: {0} {1} {2} {3}".format(chrom, p1, p2, state, individual) | |
| 83 error = 1 | |
| 84 if p1 < chrom_beg or p2 > chrom_end: | |
| 85 print >> sys.stderr, "Bad data line: outside {0} boundaries[{1} - {2}]: {3} {4} {5} {6}".format(dbkey, chrom_beg, chrom_end, chrom, p1, p2, state, individual) | |
| 86 error = 1 | |
| 87 if error: | |
| 88 sys.exit(1) | |
| 89 | |
| 90 def make_rectangle(p1, p2, color, bottom=0.0, top=1.0): | |
| 91 verts = [ | |
| 92 (p1, bottom), # left, bottom | |
| 93 (p1, top), # left, top | |
| 94 (p2, top), # right, top | |
| 95 (p2, bottom), # right, bottom | |
| 96 (0.0, 0.0) # ignored | |
| 97 ] | |
| 98 | |
| 99 codes = [ | |
| 100 Path.MOVETO, | |
| 101 Path.LINETO, | |
| 102 Path.LINETO, | |
| 103 Path.LINETO, | |
| 104 Path.CLOSEPOLY | |
| 105 ] | |
| 106 | |
| 107 path = Path(verts, codes) | |
| 108 return patches.PathPatch(path, facecolor=color, lw=0) | |
| 109 | |
| 110 def make_split_rectangle(p1, p2, top_color, bottom_color): | |
| 111 patch1 = make_rectangle(p1, p2, bottom_color, top=0.5) | |
| 112 patch2 = make_rectangle(p1, p2, top_color, bottom=0.5) | |
| 113 return [patch1, patch2] | |
| 114 | |
| 115 def make_state_rectangle(p1, p2, state, chrom, individual): | |
| 116 if state == 0: | |
| 117 return [ make_rectangle(p1, p2, 'r') ] | |
| 118 elif state == 1: | |
| 119 return make_split_rectangle(p1, p2, 'r', 'g') | |
| 120 elif state == 2: | |
| 121 return [ make_rectangle(p1, p2, 'g') ] | |
| 122 else: | |
| 123 print >> sys.stderr, "Unknown state: {0}: {1} {2} {3} {4}".format(state, chrom, p1, p2, state, individual) | |
| 124 sys.exit(1) | |
| 125 | |
| 126 def nicenum(num, round=False): | |
| 127 if num == 0: | |
| 128 return 0.0 | |
| 129 | |
| 130 exp = int(math.floor(math.log10(num))) | |
| 131 f = num / math.pow(10, exp) | |
| 132 | |
| 133 if round: | |
| 134 if f < 1.5: | |
| 135 nf = 1.0 | |
| 136 elif f < 3.0: | |
| 137 nf = 2.0 | |
| 138 elif f < 7.0: | |
| 139 nf = 5.0 | |
| 140 else: | |
| 141 nf = 10.0 | |
| 142 else: | |
| 143 if f <= 1.0: | |
| 144 nf = 1.0 | |
| 145 elif f <= 2.0: | |
| 146 nf = 2.0 | |
| 147 elif f <= 5.0: | |
| 148 nf = 5.0 | |
| 149 else: | |
| 150 nf = 10.0 | |
| 151 | |
| 152 return nf * pow(10, exp) | |
| 153 | |
| 154 def tick_foo(beg, end, loose=False): | |
| 155 ntick = 10 | |
| 156 | |
| 157 range = nicenum(end - beg, round=False) | |
| 158 d = nicenum(range/(ntick - 1), round=True) | |
| 159 digits = int(math.floor(math.log10(d))) | |
| 160 | |
| 161 if loose: | |
| 162 graph_min = math.floor(beg/d) * d | |
| 163 graph_max = math.ceil(end/d) * d | |
| 164 else: | |
| 165 graph_min = beg | |
| 166 graph_max = end | |
| 167 | |
| 168 nfrac = max([-1 * digits, 0]) | |
| 169 vals = [] | |
| 170 | |
| 171 stop = graph_max | |
| 172 if loose: | |
| 173 stop = graph_max + (0.5 * d) | |
| 174 | |
| 175 x = graph_min | |
| 176 while x <= stop: | |
| 177 vals.append(int(x)) | |
| 178 x += d | |
| 179 | |
| 180 vals = vals[1:] | |
| 181 | |
| 182 # if not loose: | |
| 183 # if vals[-1] < graph_max: | |
| 184 # vals.append(int(graph_max)) | |
| 185 | |
| 186 labels = [] | |
| 187 for val in vals: | |
| 188 labels.append('{0}'.format(int(val/math.pow(10, digits)))) | |
| 189 | |
| 190 # labels.append('{0:.1f}'.format(vals[-1]/math.pow(10, digits))) | |
| 191 | |
| 192 return vals, labels | |
| 193 | |
| 194 ################################################################################ | |
| 195 | |
| 196 def make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir): | |
| 197 fs_chrom_len = build_chrom_len_dict(input_dbkey, galaxy_data_index_dir) | |
| 198 chroms, individuals, data, chrom_len = parse_input_file(input_file) | |
| 199 | |
| 200 for chrom in chrom_len.keys(): | |
| 201 if chrom in fs_chrom_len: | |
| 202 chrom_len[chrom] = fs_chrom_len[chrom] | |
| 203 | |
| 204 #check_chroms(chroms, chrom_len, input_dbkey) | |
| 205 check_data(data, chrom_len, input_dbkey) | |
| 206 | |
| 207 ## units below are inches | |
| 208 top_space = 0.10 | |
| 209 chrom_space = 0.25 | |
| 210 chrom_height = 0.25 | |
| 211 ind_space = 0.10 | |
| 212 ind_height = 0.25 | |
| 213 | |
| 214 total_height = 0.0 | |
| 215 at_top = True | |
| 216 for chrom in chroms: | |
| 217 if at_top: | |
| 218 total_height += (top_space + chrom_height) | |
| 219 at_top = False | |
| 220 else: | |
| 221 total_height += (top_space + chrom_space + chrom_height) | |
| 222 | |
| 223 individual_count = 0 | |
| 224 for individual in individuals: | |
| 225 if individual in data[chrom]: | |
| 226 individual_count += 1 | |
| 227 total_height += individual_count * (ind_space + ind_height) | |
| 228 | |
| 229 width = 7.5 | |
| 230 height = math.ceil(total_height) | |
| 231 | |
| 232 bottom = 1.0 | |
| 233 | |
| 234 fig = plt.figure(figsize=(width, height)) | |
| 235 | |
| 236 at_top = True | |
| 237 for_webb = False | |
| 238 | |
| 239 for chrom in chroms: | |
| 240 length = chrom_len[chrom] | |
| 241 vals, labels = tick_foo(0, length) | |
| 242 | |
| 243 if at_top: | |
| 244 bottom -= (top_space + chrom_height)/height | |
| 245 at_top = False | |
| 246 else: | |
| 247 bottom -= (top_space + chrom_space + chrom_height)/height | |
| 248 | |
| 249 if not for_webb: | |
| 250 ax = fig.add_axes([0.0, bottom, 1.0, chrom_height/height]) | |
| 251 plt.axis('off') | |
| 252 plt.text(0.5, 0.5, chrom, fontsize=14, ha='center') | |
| 253 | |
| 254 individual_count = 0 | |
| 255 for individual in individuals: | |
| 256 if individual in data[chrom]: | |
| 257 individual_count += 1 | |
| 258 | |
| 259 i = 0 | |
| 260 for individual in individuals: | |
| 261 if individual in data[chrom]: | |
| 262 i += 1 | |
| 263 | |
| 264 bottom -= (ind_space + ind_height)/height | |
| 265 if not for_webb: | |
| 266 # [left, bottom, width, height] | |
| 267 ax1 = fig.add_axes([0.0, bottom, 0.09, ind_height/height]) | |
| 268 plt.axis('off') | |
| 269 plt.text(1.0, 0.5, individual, fontsize=10, ha='right', va='center') | |
| 270 # [left, bottom, width, height] | |
| 271 ax2 = fig.add_axes([0.10, bottom, 0.88, ind_height/height], frame_on=False) | |
| 272 ax2.set_xlim(0, length) | |
| 273 ax2.set_ylim(0, 1) | |
| 274 if i != individual_count: | |
| 275 plt.axis('off') | |
| 276 else: | |
| 277 if not for_webb: | |
| 278 ax2.tick_params(top=False, left=False, right=False, labelleft=False) | |
| 279 ax2.set_xticks(vals) | |
| 280 ax2.set_xticklabels(labels) | |
| 281 else: | |
| 282 plt.axis('off') | |
| 283 for p1, p2, state in sorted(data[chrom][individual]): | |
| 284 for patch in make_state_rectangle(p1, p2, state, chrom, individual): | |
| 285 ax2.add_patch(patch) | |
| 286 | |
| 287 plt.savefig(output_file) | |
| 288 | |
| 289 ################################################################################ | |
| 290 | |
| 291 if __name__ == '__main__': | |
| 292 input_dbkey, input_file, output_file, galaxy_data_index_dir = sys.argv[1:5] | |
| 293 make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir) | |
| 294 sys.exit(0) | |
| 295 |
