Mercurial > repos > mzeidler > virana_main
comparison vhom_regionplot.py @ 43:c74e70b459dd draft
Uploaded
author | mzeidler |
---|---|
date | Mon, 04 Nov 2013 11:54:58 -0500 |
parents | d5a3b07f91c4 |
children |
comparison
equal
deleted
inserted
replaced
42:310843ad5112 | 43:c74e70b459dd |
---|---|
17 import shutil | 17 import shutil |
18 | 18 |
19 import sys | 19 import sys |
20 | 20 |
21 import colorsys | 21 import colorsys |
22 | |
23 from numpy import arange | |
24 | |
25 import copy | |
22 | 26 |
23 | 27 |
24 | 28 |
25 | 29 |
26 rcParams['figure.figsize'] = (6, 6) | 30 rcParams['figure.figsize'] = (6, 6) |
30 rcParams['patch.edgecolor'] = 'white' | 34 rcParams['patch.edgecolor'] = 'white' |
31 rcParams['patch.linewidth']=0.5 | 35 rcParams['patch.linewidth']=0.5 |
32 rcParams['patch.facecolor'] = 'black' | 36 rcParams['patch.facecolor'] = 'black' |
33 rcParams['font.family'] = 'StixGeneral' | 37 rcParams['font.family'] = 'StixGeneral' |
34 | 38 |
35 | 39 |
36 def color_variants(r,g,b): | 40 def rgb_color_variants(stat_dict): |
37 h,l2,s= colorsys.rgb_to_hls(r,g,b) | 41 |
38 l=[l2-0.4,l2-0.1,l2+0.2] | 42 n_samples=0 |
39 hexs=[] | 43 sample_names=[] |
40 for i in l: | 44 |
41 hexs.append(colors.rgb2hex(colorsys.hls_to_rgb(h,i,s))) | 45 for fam,obj in stat_dict.iteritems(): |
42 return hexs | 46 if len(obj.samples)>n_samples: |
43 | 47 n_samples=len(obj.samples) |
44 def rgb_color_variants(r,g,b): | 48 for sample in obj.samples.iterkeys(): |
45 h,l2,s= colorsys.rgb_to_hls(r,g,b) | 49 if sample not in sample_names: |
46 l=[l2-0.4,l2-0.1,l2+0.2] | 50 sample_names.append(sample) |
51 | |
52 | |
53 assert n_samples == len(sample_names) #iterate over samples -> color_map | |
47 rgbs={} | 54 rgbs={} |
48 rgbs["pathogen"]=colorsys.hls_to_rgb(h,l[0],s) | 55 i=1 |
49 rgbs["ambiguous"]=colorsys.hls_to_rgb(h,l[1],s) | 56 for sample in sample_names: |
50 rgbs["human"]=colorsys.hls_to_rgb(h,l[2],s) | 57 rgbs[sample]={} |
58 h=1.*(i/n_samples) | |
59 h=float(h)/2. | |
60 l=[0.1,0.4,0.7] | |
61 rgbs[sample]["pathogen"]=colorsys.hls_to_rgb(h,l[0],1.0) | |
62 rgbs[sample]["ambiguous"]=colorsys.hls_to_rgb(h,l[1],1.0) | |
63 rgbs[sample]["human"]=colorsys.hls_to_rgb(h,l[2],1.0) | |
64 | |
65 i+=1 | |
51 return rgbs | 66 return rgbs |
52 | 67 def convert_color_map(color_map): |
53 | 68 deep = copy.deepcopy(color_map) |
69 for sample in deep: | |
70 for state in deep[sample]: | |
71 deep[sample][state] = colors.rgb2hex(deep[sample][state]) | |
72 return deep | |
73 | |
74 def means(start,stop): | |
75 list = arange(start,stop) | |
76 return (float(sum(list))/float(len(list))) | |
54 | 77 |
55 | 78 |
56 def remove_border(axes=None, top=False, right=False, left=True, bottom=True): | 79 def remove_border(axes=None, top=False, right=False, left=True, bottom=True): |
57 """ | 80 """ |
58 Minimize chartjunk by stripping out unnecesasry plot borders and axis ticks | 81 Minimize chartjunk by stripping out unnecesasry plot borders and axis ticks |
80 ax.yaxis.tick_right() | 103 ax.yaxis.tick_right() |
81 | 104 |
82 | 105 |
83 def generateHTML(stat_dict,file,directory): | 106 def generateHTML(stat_dict,file,directory): |
84 rval=["<html><head><title>Homologous groups and regions derived from hit files </title></head><body><p/>\n"] | 107 rval=["<html><head><title>Homologous groups and regions derived from hit files </title></head><body><p/>\n"] |
85 rval.append('<link rel="stylesheet" type="text/css" href="/static/style/base.css">') | 108 #rval.append('<link rel="stylesheet" type="text/css" href="/static/style/base.css">') |
86 rval.append("<a href='plot.pdf'>Download Plot</a>") | 109 rval.append("<a href='plot.pdf'>Download Plot</a>") |
87 #sven create <div> here for some text | 110 rval.append("<div id='graph' class='graph'></div>") |
88 n_samples=0 | |
89 for sample in stat_dict.iterkeys(): | |
90 n_samples+=1 | |
91 rval.append("<div id=%s_graph></div><p/>"%(sample)) | |
92 curr_dir = os.path.dirname(os.path.realpath(__file__)) | 111 curr_dir = os.path.dirname(os.path.realpath(__file__)) |
93 shutil.copy(os.path.join(curr_dir,"jquery-1.10.2.min.js"),directory) | 112 shutil.copy(os.path.join(curr_dir,"jquery-1.10.2.min.js"),directory) |
94 shutil.copy(os.path.join(curr_dir,"jqBarGraph.2.1.js"),directory) | 113 shutil.copy(os.path.join(curr_dir,"jqBarGraph.2.1.js"),directory) |
95 rval.append("<script type='text/javascript' src='jquery-1.10.2.min.js'></script>") | 114 rval.append("<script type='text/javascript' src='jquery-1.10.2.min.js'></script>") |
96 rval.append("<script type='text/javascript' src='jqBarGraph.2.1.js'></script>") | 115 rval.append("<script type='text/javascript' src='jqBarGraph.2.1.js'></script>") |
97 rval.append('<script>') | 116 rval.append('<script>') |
98 i=0 | 117 rval.append("$('#graph').jqBarGraph({") |
99 for sample in stat_dict.iterkeys(): | 118 rval.append("data: %s,"%stat_dict) |
100 i+=1 | 119 |
101 rval.append("%s_array = new Array(" %sample) | 120 color_map=convert_color_map(rgb_color_variants(stat_dict)) |
102 for family in stat_dict[sample].iterkeys(): | 121 |
103 values=[] | 122 rval.append("colors: %s," %color_map) |
104 for region,data in stat_dict[sample][family].iteritems(): | 123 rval.append("legend: true,") |
105 values.append([int(data[0][1]),int(data[0][2]), data[0][0], data[0][3],int(data[0][4]),int(region)]) | 124 rval.append("tab: 'reads',") |
106 rval.append(str([values,family])+",") | 125 rval.append("title: '<H3>Visualisation of Data</H3>',") |
107 rval[-1]=rval[-1][:-1] | 126 rval.append("width: %d"%((len(stat_dict)*50)+150)) |
108 rval.append(");") | 127 rval.append("});") |
109 rval.append("%s_files=%s;"%(sample,str(getFiles(directory)))) | |
110 rval.append("$('#%s_graph').jqBarGraph({"%sample) | |
111 rval.append("sample: '%s',"%sample) | |
112 rval.append("data: %s_array,"%sample) | |
113 rval.append("files: %s_files,"%sample) | |
114 r,g,b,a=cm.hsv(1.*(1-(i/n_samples))) | |
115 rval.append("colors: %s," %color_variants(r,g,b)) | |
116 rval.append("legend: true,") | |
117 rval.append("tab: 'reads',") | |
118 rval.append("title: '<H3>Visualisation of Sample %s</H3>',"%sample) | |
119 rval.append("width: %d"%((len(stat_dict[sample])*50)+150)) | |
120 rval.append("});") | |
121 | 128 |
122 rval.append("</script>") | 129 rval.append("</script>") |
123 | 130 |
124 rval.append( '</body></html>' ) | 131 rval.append( '</body></html>' ) |
125 with open(file,'w') as file: | 132 with open(file,'w') as file: |
126 file.write("\n".join( rval )) | 133 file.write("\n".join( rval )) |
127 def transformDict(stat_dict): | |
128 dict={} | |
129 position={} | |
130 dict_label={} | |
131 for sample in stat_dict.iterkeys(): | |
132 dict[sample]=[{},{}] | |
133 position[sample]={} | |
134 dict_label[sample]={} | |
135 j=0 | |
136 for family in stat_dict[sample].iterkeys(): | |
137 | |
138 i=0 | |
139 for region,values in stat_dict[sample][family].iteritems(): | |
140 | |
141 if i not in dict[sample][0]: | |
142 dict[sample][0][i]=[] | |
143 dict[sample][1][i]=[] | |
144 position[sample][i]=[] | |
145 dict_label[sample][i]=[] | |
146 dict[sample][0][i].append(int(values[0][1])) | |
147 dict[sample][1][i].append(int(values[0][2])) | |
148 position[sample][i].append(j) | |
149 dict_label[sample][i].append(values[0][0]) | |
150 i+=1 | |
151 j+=1 | |
152 return dict,position,dict_label | |
153 | 134 |
154 def customeLegend(color_map): | 135 def customeLegend(color_map): |
155 legend_map=[[],[]] | 136 legend_map=[[],[]] |
156 for label in color_map: | 137 for sample in color_map: |
157 box=Rectangle((0,0),1,1,color=color_map[label],fill=True) | 138 legend_map[0].append(Rectangle((0,0),0,0,visible=False)) |
158 legend_map[0].append(box) | 139 legend_map[1].append(sample) |
159 legend_map[1].append(label) | 140 for label in color_map[sample]: |
160 plt.figlegend(legend_map[0],legend_map[1],loc=8,ncol=len(color_map)) | 141 box=Rectangle((0,0),1,1,color=color_map[sample][label],fill=True) |
142 legend_map[0].append(box) | |
143 legend_map[1].append(label) | |
144 plt.figlegend(legend_map[0],legend_map[1],loc=9,ncol=len(color_map),prop={'size':8}) | |
161 | 145 |
162 | 146 |
163 | 147 |
164 | 148 |
165 def generatePyPlot(stat_dict,output_file): | 149 def generatePyPlot(stat_dict,output_file): |
166 pp = PdfPages(output_file) | 150 pp = PdfPages(output_file) |
167 fig = plt.figure() | 151 |
168 | 152 color_map=rgb_color_variants(stat_dict) |
169 n_samples=len(stat_dict) | 153 n_samples=len(color_map) |
170 dict,position,dict_label=transformDict(stat_dict) | 154 for plot in ["reads","bp"]: |
155 fig=plt.figure() | |
156 i=0 | |
157 for f,fam in stat_dict.iteritems(): | |
158 j=0 | |
159 for s,sample in fam.samples.iteritems(): | |
160 values = [] | |
161 color = [] | |
162 for r,region in sample.regions.iteritems(): | |
163 values.append(int(getattr(region,plot))) | |
164 color.append(color_map[s][region.state]) | |
165 if(len(values)>1): | |
166 b = values[:-1] | |
167 b.insert(0,0); | |
168 for u in range(1,len(b)): | |
169 b[u]+=b[u-1] | |
170 plt.bar([i+j]*len(values),values,bottom=b,width=0.8,color=color) | |
171 else: | |
172 plt.bar([i+j]*len(values),values,width=0.8,color=color) | |
171 | 173 |
172 i=0 | 174 j+=1 |
173 for sample in dict.iterkeys(): | 175 i+= 1+n_samples |
174 fig = plt.figure() | 176 pos=[means(x,x+n_samples)+0.4 for x in arange(0,i+n_samples,1+n_samples)] |
175 | 177 plt.xticks(pos, stat_dict.keys(), rotation='vertical') |
176 fig.subplots_adjust(bottom=0.25,hspace=0.5) | 178 if(plot=="reads"): |
177 | |
178 r,g,b,a=cm.hsv(1.*(1-(i/n_samples))) | |
179 color_map=rgb_color_variants(r,g,b) | |
180 | |
181 for sub in [0,1]: | |
182 plt.subplot(1,2,sub+1) | |
183 bottom={} | |
184 for level,values in dict[sample][sub].iteritems(): | |
185 colors=[color_map[r] for r in dict_label[sample][level]] | |
186 | |
187 if level==0: | |
188 | |
189 plt.bar(position[sample][level],values,bottom=0, width=0.8 ,color=colors) | |
190 else: | |
191 lastvalues=[] | |
192 for oldpos in range(len(values)): | |
193 lastvalues.append(bottom[position[sample][level][oldpos]]) | |
194 | |
195 plt.bar(position[sample][level],values, width=0.8,bottom=lastvalues ,color=colors) | |
196 for pos in range(len(values)): | |
197 if position[sample][level][pos] not in bottom: | |
198 bottom[position[sample][level][pos]]=values[pos] | |
199 else: | |
200 bottom[position[sample][level][pos]]+=values[pos] | |
201 pos=[x+0.4 for x in range(len(stat_dict[sample]))] | |
202 plt.xticks(pos, stat_dict[sample].keys(), rotation='vertical') | |
203 if(sub==0): | |
204 plt.ylabel("Cumulative reads assigned to family") | 179 plt.ylabel("Cumulative reads assigned to family") |
205 else: | 180 if(plot=="bp"): |
206 plt.ylabel("Cumulative basepairs assigned to family") | 181 plt.ylabel("Cumulative basepairs assigned to family") |
207 plt.xlabel("") | 182 plt.xlabel("") |
208 fig.subplots_adjust(bottom=0.25,wspace=0.5) | 183 fig.subplots_adjust(bottom=0.25,wspace=0.5,top=0.8) |
209 remove_border() | 184 remove_border() |
210 | 185 |
211 customeLegend(color_map) | 186 customeLegend(color_map) |
212 plt.suptitle(sample) | |
213 pp.savefig(fig) | 187 pp.savefig(fig) |
214 i+=1 | |
215 pp.close() | 188 pp.close() |
216 | 189 |
217 | 190 |
218 | 191 |
219 def parseStatFile(filename): | 192 def parseStatFile(filename, directory): |
220 | 193 |
221 with open(filename,'r') as file: | 194 with open(filename,'r') as file: |
222 values=[ map(str,line.split('\t')) for line in file ] | 195 values=[ map(str,line.split('\t')) for line in file ] |
223 | 196 |
197 family_dict={} | |
198 for line in values[1:]: | |
199 fam_type,fam,region,sample,state,reads,length,bp,coverage=line | |
200 if fam not in family_dict: | |
201 family= Family(fam,fam_type,directory=directory) | |
202 family_dict[fam]=family | |
203 family_dict[fam].add(sample,region,state,reads,bp,length) | |
204 | |
205 return family_dict | |
206 | |
207 class Family: | |
208 | |
209 def __init__(self,name,typ,directory=None,samples=None): | |
210 self.name=name | |
211 if samples!=None: | |
212 self.samples=samples | |
213 else: | |
214 self.samples={} | |
215 self.directory=directory | |
216 self.typ=typ | |
217 if self.directory: | |
218 self.files=self.__getFiles(directory) | |
219 else: | |
220 self.files=None | |
221 | |
222 def add(self,sample,region,state,reads,bp,length): | |
223 if sample not in self.samples: | |
224 s = Sample(sample) | |
225 self.samples[sample]=s | |
226 | |
227 return self.samples[sample].add(region,state,reads,bp,self.files[region],length) | |
228 | |
229 | |
230 def __getFiles(self,directory): | |
231 dlist = os.listdir(os.path.join(directory,self.typ+"_"+self.name)) | |
224 dict={} | 232 dict={} |
225 for line in values[1:]: | 233 for file in dlist: |
226 if line[3] not in dict: | 234 try: |
227 dict[line[3]]={} | 235 wc,region,suf = file.split("_") |
228 dict[line[3]][line[1]]={} | 236 except ValueError: |
229 dict[line[3]][line[1]][line[2]]=[[line[4],line[5],line[7],line[0],line[6]]] | 237 continue |
230 else: | 238 if region not in dict: |
231 if line[1] not in dict[line[3]]: | 239 dict[region]={} |
232 dict[line[3]][line[1]]={} | 240 dict[region][suf]=file #bit redundant?! just save TRUE or FALSE? |
233 dict[line[3]][line[1]][line[2]]=[[line[4],line[5],line[7],line[0],line[6]]] | |
234 else: | |
235 if line[2] not in dict[line[3]][line[1]]: | |
236 dict[line[3]][line[1]][line[2]]=[[line[4],line[5],line[7],line[0],line[6]]] | |
237 else: | |
238 dict[line[3]][line[1]][line[2]].append([line[4],line[5],line[7],line[0],line[6]]) | |
239 # for key in dict.iterkeys(): | |
240 # for family_key,values in dict[key].iteritems(): | |
241 # for line in values: | |
242 # print key,family_key,line | |
243 return dict | 241 return dict |
244 def getFiles(directory): | 242 |
245 rval={} | 243 def __str__(self): |
246 dlist = os.listdir(directory) | 244 return "{'type':'%s', 'samples':%s}"%(self.typ,str(self.samples)) |
247 for dire in dlist: | 245 |
248 if(os.path.isdir(os.path.join(directory,dire))): | 246 def __repr__(self): |
249 rval[dire]={} | 247 return "{'type':'%s', 'samples':%s}"%(self.typ,str(self.samples)) |
250 flist = os.listdir(os.path.join(directory,dire)) | 248 |
251 for file in flist: | 249 class Sample: |
252 split=file.split("_") | 250 |
253 region=split[1] | 251 def __init__(self,name,regions=None): |
254 if region not in rval[dire]: | 252 self.name=name |
255 rval[dire][region]=[file] | 253 if regions!=None: |
256 else: | 254 self.regions=regions |
257 rval[dire][region].append(file) | 255 else: |
258 | 256 self.regions={} |
259 return rval | 257 def add(self,region,state,reads,bp,files,length): |
260 | 258 if region not in self.regions: |
261 | 259 r = Region(region,state,reads,bp,files,length) |
260 self.regions[region]=r | |
261 return True | |
262 else: | |
263 return False | |
264 | |
265 def __str__(self): | |
266 return str(self.regions) | |
267 | |
268 def __repr__(self): | |
269 return str(self.regions) | |
270 | |
271 | |
272 class Region: | |
273 | |
274 def __init__(self,id,state,reads,bp,files,length): | |
275 self.id=id | |
276 self.state=state | |
277 self.reads=reads | |
278 self.bp=bp | |
279 self.files=files | |
280 self.length=length | |
281 | |
282 def __str__(self): | |
283 return "{'state':'%s', 'reads':%s, 'bp':%s, 'files':%s, 'length':%s}" %(self.state,self.reads,self.bp,self.files,self.length) | |
284 | |
285 def __repr__(self): | |
286 return "{'state':'%s', 'reads':%s, 'bp':%s, 'files':%s, 'length':%s}" %(self.state,self.reads,self.bp,self.files,self.length) | |
287 | |
262 | 288 |
263 if __name__ == "__main__": | 289 if __name__ == "__main__": |
264 | 290 |
265 | 291 |
266 parser = ArgumentParser() | 292 parser = ArgumentParser() |
272 | 298 |
273 (options,args)= parser.parse_known_args() | 299 (options,args)= parser.parse_known_args() |
274 | 300 |
275 args.insert(0,"dummy") | 301 args.insert(0,"dummy") |
276 try: | 302 try: |
277 RegionRunner.run(argv=args) | 303 sys.exit(1)#RegionRunner.run(argv=args) |
278 except SystemExit: | 304 except SystemExit: |
279 stat_dict=parseStatFile(options.stat_file) | 305 stat_dict=parseStatFile(options.stat_file,options.directory) |
306 | |
280 generatePyPlot(stat_dict,os.path.join(options.directory,"plot.pdf")) | 307 generatePyPlot(stat_dict,os.path.join(options.directory,"plot.pdf")) |
281 generateHTML(stat_dict,options.html_file,options.directory) | 308 generateHTML(stat_dict,options.html_file,options.directory) |
282 | 309 |
283 | 310 |
284 | 311 |