comparison vhom_regionplot.py @ 43:c74e70b459dd draft

Uploaded
author mzeidler
date Mon, 04 Nov 2013 11:54:58 -0500
parents d5a3b07f91c4
children
comparison
equal deleted inserted replaced
42:310843ad5112 43:c74e70b459dd
17 import shutil 17 import shutil
18 18
19 import sys 19 import sys
20 20
21 import colorsys 21 import colorsys
22
23 from numpy import arange
24
25 import copy
22 26
23 27
24 28
25 29
26 rcParams['figure.figsize'] = (6, 6) 30 rcParams['figure.figsize'] = (6, 6)
30 rcParams['patch.edgecolor'] = 'white' 34 rcParams['patch.edgecolor'] = 'white'
31 rcParams['patch.linewidth']=0.5 35 rcParams['patch.linewidth']=0.5
32 rcParams['patch.facecolor'] = 'black' 36 rcParams['patch.facecolor'] = 'black'
33 rcParams['font.family'] = 'StixGeneral' 37 rcParams['font.family'] = 'StixGeneral'
34 38
35 39
36 def color_variants(r,g,b): 40 def rgb_color_variants(stat_dict):
37 h,l2,s= colorsys.rgb_to_hls(r,g,b) 41
38 l=[l2-0.4,l2-0.1,l2+0.2] 42 n_samples=0
39 hexs=[] 43 sample_names=[]
40 for i in l: 44
41 hexs.append(colors.rgb2hex(colorsys.hls_to_rgb(h,i,s))) 45 for fam,obj in stat_dict.iteritems():
42 return hexs 46 if len(obj.samples)>n_samples:
43 47 n_samples=len(obj.samples)
44 def rgb_color_variants(r,g,b): 48 for sample in obj.samples.iterkeys():
45 h,l2,s= colorsys.rgb_to_hls(r,g,b) 49 if sample not in sample_names:
46 l=[l2-0.4,l2-0.1,l2+0.2] 50 sample_names.append(sample)
51
52
53 assert n_samples == len(sample_names) #iterate over samples -> color_map
47 rgbs={} 54 rgbs={}
48 rgbs["pathogen"]=colorsys.hls_to_rgb(h,l[0],s) 55 i=1
49 rgbs["ambiguous"]=colorsys.hls_to_rgb(h,l[1],s) 56 for sample in sample_names:
50 rgbs["human"]=colorsys.hls_to_rgb(h,l[2],s) 57 rgbs[sample]={}
58 h=1.*(i/n_samples)
59 h=float(h)/2.
60 l=[0.1,0.4,0.7]
61 rgbs[sample]["pathogen"]=colorsys.hls_to_rgb(h,l[0],1.0)
62 rgbs[sample]["ambiguous"]=colorsys.hls_to_rgb(h,l[1],1.0)
63 rgbs[sample]["human"]=colorsys.hls_to_rgb(h,l[2],1.0)
64
65 i+=1
51 return rgbs 66 return rgbs
52 67 def convert_color_map(color_map):
53 68 deep = copy.deepcopy(color_map)
69 for sample in deep:
70 for state in deep[sample]:
71 deep[sample][state] = colors.rgb2hex(deep[sample][state])
72 return deep
73
74 def means(start,stop):
75 list = arange(start,stop)
76 return (float(sum(list))/float(len(list)))
54 77
55 78
56 def remove_border(axes=None, top=False, right=False, left=True, bottom=True): 79 def remove_border(axes=None, top=False, right=False, left=True, bottom=True):
57 """ 80 """
58 Minimize chartjunk by stripping out unnecesasry plot borders and axis ticks 81 Minimize chartjunk by stripping out unnecesasry plot borders and axis ticks
80 ax.yaxis.tick_right() 103 ax.yaxis.tick_right()
81 104
82 105
83 def generateHTML(stat_dict,file,directory): 106 def generateHTML(stat_dict,file,directory):
84 rval=["<html><head><title>Homologous groups and regions derived from hit files </title></head><body><p/>\n"] 107 rval=["<html><head><title>Homologous groups and regions derived from hit files </title></head><body><p/>\n"]
85 rval.append('<link rel="stylesheet" type="text/css" href="/static/style/base.css">') 108 #rval.append('<link rel="stylesheet" type="text/css" href="/static/style/base.css">')
86 rval.append("<a href='plot.pdf'>Download Plot</a>") 109 rval.append("<a href='plot.pdf'>Download Plot</a>")
87 #sven create <div> here for some text 110 rval.append("<div id='graph' class='graph'></div>")
88 n_samples=0
89 for sample in stat_dict.iterkeys():
90 n_samples+=1
91 rval.append("<div id=%s_graph></div><p/>"%(sample))
92 curr_dir = os.path.dirname(os.path.realpath(__file__)) 111 curr_dir = os.path.dirname(os.path.realpath(__file__))
93 shutil.copy(os.path.join(curr_dir,"jquery-1.10.2.min.js"),directory) 112 shutil.copy(os.path.join(curr_dir,"jquery-1.10.2.min.js"),directory)
94 shutil.copy(os.path.join(curr_dir,"jqBarGraph.2.1.js"),directory) 113 shutil.copy(os.path.join(curr_dir,"jqBarGraph.2.1.js"),directory)
95 rval.append("<script type='text/javascript' src='jquery-1.10.2.min.js'></script>") 114 rval.append("<script type='text/javascript' src='jquery-1.10.2.min.js'></script>")
96 rval.append("<script type='text/javascript' src='jqBarGraph.2.1.js'></script>") 115 rval.append("<script type='text/javascript' src='jqBarGraph.2.1.js'></script>")
97 rval.append('<script>') 116 rval.append('<script>')
98 i=0 117 rval.append("$('#graph').jqBarGraph({")
99 for sample in stat_dict.iterkeys(): 118 rval.append("data: %s,"%stat_dict)
100 i+=1 119
101 rval.append("%s_array = new Array(" %sample) 120 color_map=convert_color_map(rgb_color_variants(stat_dict))
102 for family in stat_dict[sample].iterkeys(): 121
103 values=[] 122 rval.append("colors: %s," %color_map)
104 for region,data in stat_dict[sample][family].iteritems(): 123 rval.append("legend: true,")
105 values.append([int(data[0][1]),int(data[0][2]), data[0][0], data[0][3],int(data[0][4]),int(region)]) 124 rval.append("tab: 'reads',")
106 rval.append(str([values,family])+",") 125 rval.append("title: '<H3>Visualisation of Data</H3>',")
107 rval[-1]=rval[-1][:-1] 126 rval.append("width: %d"%((len(stat_dict)*50)+150))
108 rval.append(");") 127 rval.append("});")
109 rval.append("%s_files=%s;"%(sample,str(getFiles(directory))))
110 rval.append("$('#%s_graph').jqBarGraph({"%sample)
111 rval.append("sample: '%s',"%sample)
112 rval.append("data: %s_array,"%sample)
113 rval.append("files: %s_files,"%sample)
114 r,g,b,a=cm.hsv(1.*(1-(i/n_samples)))
115 rval.append("colors: %s," %color_variants(r,g,b))
116 rval.append("legend: true,")
117 rval.append("tab: 'reads',")
118 rval.append("title: '<H3>Visualisation of Sample %s</H3>',"%sample)
119 rval.append("width: %d"%((len(stat_dict[sample])*50)+150))
120 rval.append("});")
121 128
122 rval.append("</script>") 129 rval.append("</script>")
123 130
124 rval.append( '</body></html>' ) 131 rval.append( '</body></html>' )
125 with open(file,'w') as file: 132 with open(file,'w') as file:
126 file.write("\n".join( rval )) 133 file.write("\n".join( rval ))
127 def transformDict(stat_dict):
128 dict={}
129 position={}
130 dict_label={}
131 for sample in stat_dict.iterkeys():
132 dict[sample]=[{},{}]
133 position[sample]={}
134 dict_label[sample]={}
135 j=0
136 for family in stat_dict[sample].iterkeys():
137
138 i=0
139 for region,values in stat_dict[sample][family].iteritems():
140
141 if i not in dict[sample][0]:
142 dict[sample][0][i]=[]
143 dict[sample][1][i]=[]
144 position[sample][i]=[]
145 dict_label[sample][i]=[]
146 dict[sample][0][i].append(int(values[0][1]))
147 dict[sample][1][i].append(int(values[0][2]))
148 position[sample][i].append(j)
149 dict_label[sample][i].append(values[0][0])
150 i+=1
151 j+=1
152 return dict,position,dict_label
153 134
154 def customeLegend(color_map): 135 def customeLegend(color_map):
155 legend_map=[[],[]] 136 legend_map=[[],[]]
156 for label in color_map: 137 for sample in color_map:
157 box=Rectangle((0,0),1,1,color=color_map[label],fill=True) 138 legend_map[0].append(Rectangle((0,0),0,0,visible=False))
158 legend_map[0].append(box) 139 legend_map[1].append(sample)
159 legend_map[1].append(label) 140 for label in color_map[sample]:
160 plt.figlegend(legend_map[0],legend_map[1],loc=8,ncol=len(color_map)) 141 box=Rectangle((0,0),1,1,color=color_map[sample][label],fill=True)
142 legend_map[0].append(box)
143 legend_map[1].append(label)
144 plt.figlegend(legend_map[0],legend_map[1],loc=9,ncol=len(color_map),prop={'size':8})
161 145
162 146
163 147
164 148
165 def generatePyPlot(stat_dict,output_file): 149 def generatePyPlot(stat_dict,output_file):
166 pp = PdfPages(output_file) 150 pp = PdfPages(output_file)
167 fig = plt.figure() 151
168 152 color_map=rgb_color_variants(stat_dict)
169 n_samples=len(stat_dict) 153 n_samples=len(color_map)
170 dict,position,dict_label=transformDict(stat_dict) 154 for plot in ["reads","bp"]:
155 fig=plt.figure()
156 i=0
157 for f,fam in stat_dict.iteritems():
158 j=0
159 for s,sample in fam.samples.iteritems():
160 values = []
161 color = []
162 for r,region in sample.regions.iteritems():
163 values.append(int(getattr(region,plot)))
164 color.append(color_map[s][region.state])
165 if(len(values)>1):
166 b = values[:-1]
167 b.insert(0,0);
168 for u in range(1,len(b)):
169 b[u]+=b[u-1]
170 plt.bar([i+j]*len(values),values,bottom=b,width=0.8,color=color)
171 else:
172 plt.bar([i+j]*len(values),values,width=0.8,color=color)
171 173
172 i=0 174 j+=1
173 for sample in dict.iterkeys(): 175 i+= 1+n_samples
174 fig = plt.figure() 176 pos=[means(x,x+n_samples)+0.4 for x in arange(0,i+n_samples,1+n_samples)]
175 177 plt.xticks(pos, stat_dict.keys(), rotation='vertical')
176 fig.subplots_adjust(bottom=0.25,hspace=0.5) 178 if(plot=="reads"):
177
178 r,g,b,a=cm.hsv(1.*(1-(i/n_samples)))
179 color_map=rgb_color_variants(r,g,b)
180
181 for sub in [0,1]:
182 plt.subplot(1,2,sub+1)
183 bottom={}
184 for level,values in dict[sample][sub].iteritems():
185 colors=[color_map[r] for r in dict_label[sample][level]]
186
187 if level==0:
188
189 plt.bar(position[sample][level],values,bottom=0, width=0.8 ,color=colors)
190 else:
191 lastvalues=[]
192 for oldpos in range(len(values)):
193 lastvalues.append(bottom[position[sample][level][oldpos]])
194
195 plt.bar(position[sample][level],values, width=0.8,bottom=lastvalues ,color=colors)
196 for pos in range(len(values)):
197 if position[sample][level][pos] not in bottom:
198 bottom[position[sample][level][pos]]=values[pos]
199 else:
200 bottom[position[sample][level][pos]]+=values[pos]
201 pos=[x+0.4 for x in range(len(stat_dict[sample]))]
202 plt.xticks(pos, stat_dict[sample].keys(), rotation='vertical')
203 if(sub==0):
204 plt.ylabel("Cumulative reads assigned to family") 179 plt.ylabel("Cumulative reads assigned to family")
205 else: 180 if(plot=="bp"):
206 plt.ylabel("Cumulative basepairs assigned to family") 181 plt.ylabel("Cumulative basepairs assigned to family")
207 plt.xlabel("") 182 plt.xlabel("")
208 fig.subplots_adjust(bottom=0.25,wspace=0.5) 183 fig.subplots_adjust(bottom=0.25,wspace=0.5,top=0.8)
209 remove_border() 184 remove_border()
210 185
211 customeLegend(color_map) 186 customeLegend(color_map)
212 plt.suptitle(sample)
213 pp.savefig(fig) 187 pp.savefig(fig)
214 i+=1
215 pp.close() 188 pp.close()
216 189
217 190
218 191
219 def parseStatFile(filename): 192 def parseStatFile(filename, directory):
220 193
221 with open(filename,'r') as file: 194 with open(filename,'r') as file:
222 values=[ map(str,line.split('\t')) for line in file ] 195 values=[ map(str,line.split('\t')) for line in file ]
223 196
197 family_dict={}
198 for line in values[1:]:
199 fam_type,fam,region,sample,state,reads,length,bp,coverage=line
200 if fam not in family_dict:
201 family= Family(fam,fam_type,directory=directory)
202 family_dict[fam]=family
203 family_dict[fam].add(sample,region,state,reads,bp,length)
204
205 return family_dict
206
207 class Family:
208
209 def __init__(self,name,typ,directory=None,samples=None):
210 self.name=name
211 if samples!=None:
212 self.samples=samples
213 else:
214 self.samples={}
215 self.directory=directory
216 self.typ=typ
217 if self.directory:
218 self.files=self.__getFiles(directory)
219 else:
220 self.files=None
221
222 def add(self,sample,region,state,reads,bp,length):
223 if sample not in self.samples:
224 s = Sample(sample)
225 self.samples[sample]=s
226
227 return self.samples[sample].add(region,state,reads,bp,self.files[region],length)
228
229
230 def __getFiles(self,directory):
231 dlist = os.listdir(os.path.join(directory,self.typ+"_"+self.name))
224 dict={} 232 dict={}
225 for line in values[1:]: 233 for file in dlist:
226 if line[3] not in dict: 234 try:
227 dict[line[3]]={} 235 wc,region,suf = file.split("_")
228 dict[line[3]][line[1]]={} 236 except ValueError:
229 dict[line[3]][line[1]][line[2]]=[[line[4],line[5],line[7],line[0],line[6]]] 237 continue
230 else: 238 if region not in dict:
231 if line[1] not in dict[line[3]]: 239 dict[region]={}
232 dict[line[3]][line[1]]={} 240 dict[region][suf]=file #bit redundant?! just save TRUE or FALSE?
233 dict[line[3]][line[1]][line[2]]=[[line[4],line[5],line[7],line[0],line[6]]]
234 else:
235 if line[2] not in dict[line[3]][line[1]]:
236 dict[line[3]][line[1]][line[2]]=[[line[4],line[5],line[7],line[0],line[6]]]
237 else:
238 dict[line[3]][line[1]][line[2]].append([line[4],line[5],line[7],line[0],line[6]])
239 # for key in dict.iterkeys():
240 # for family_key,values in dict[key].iteritems():
241 # for line in values:
242 # print key,family_key,line
243 return dict 241 return dict
244 def getFiles(directory): 242
245 rval={} 243 def __str__(self):
246 dlist = os.listdir(directory) 244 return "{'type':'%s', 'samples':%s}"%(self.typ,str(self.samples))
247 for dire in dlist: 245
248 if(os.path.isdir(os.path.join(directory,dire))): 246 def __repr__(self):
249 rval[dire]={} 247 return "{'type':'%s', 'samples':%s}"%(self.typ,str(self.samples))
250 flist = os.listdir(os.path.join(directory,dire)) 248
251 for file in flist: 249 class Sample:
252 split=file.split("_") 250
253 region=split[1] 251 def __init__(self,name,regions=None):
254 if region not in rval[dire]: 252 self.name=name
255 rval[dire][region]=[file] 253 if regions!=None:
256 else: 254 self.regions=regions
257 rval[dire][region].append(file) 255 else:
258 256 self.regions={}
259 return rval 257 def add(self,region,state,reads,bp,files,length):
260 258 if region not in self.regions:
261 259 r = Region(region,state,reads,bp,files,length)
260 self.regions[region]=r
261 return True
262 else:
263 return False
264
265 def __str__(self):
266 return str(self.regions)
267
268 def __repr__(self):
269 return str(self.regions)
270
271
272 class Region:
273
274 def __init__(self,id,state,reads,bp,files,length):
275 self.id=id
276 self.state=state
277 self.reads=reads
278 self.bp=bp
279 self.files=files
280 self.length=length
281
282 def __str__(self):
283 return "{'state':'%s', 'reads':%s, 'bp':%s, 'files':%s, 'length':%s}" %(self.state,self.reads,self.bp,self.files,self.length)
284
285 def __repr__(self):
286 return "{'state':'%s', 'reads':%s, 'bp':%s, 'files':%s, 'length':%s}" %(self.state,self.reads,self.bp,self.files,self.length)
287
262 288
263 if __name__ == "__main__": 289 if __name__ == "__main__":
264 290
265 291
266 parser = ArgumentParser() 292 parser = ArgumentParser()
272 298
273 (options,args)= parser.parse_known_args() 299 (options,args)= parser.parse_known_args()
274 300
275 args.insert(0,"dummy") 301 args.insert(0,"dummy")
276 try: 302 try:
277 RegionRunner.run(argv=args) 303 sys.exit(1)#RegionRunner.run(argv=args)
278 except SystemExit: 304 except SystemExit:
279 stat_dict=parseStatFile(options.stat_file) 305 stat_dict=parseStatFile(options.stat_file,options.directory)
306
280 generatePyPlot(stat_dict,os.path.join(options.directory,"plot.pdf")) 307 generatePyPlot(stat_dict,os.path.join(options.directory,"plot.pdf"))
281 generateHTML(stat_dict,options.html_file,options.directory) 308 generateHTML(stat_dict,options.html_file,options.directory)
282 309
283 310
284 311