| 22 | 1 import matplotlib | 
|  | 2 matplotlib.use('Agg') | 
| 0 | 3 import matplotlib.pyplot as plt | 
|  | 4 | 
|  | 5 from matplotlib import rcParams | 
|  | 6 from matplotlib import colors | 
|  | 7 from matplotlib import cm | 
|  | 8 | 
| 33 | 9 from matplotlib.patches import Rectangle | 
|  | 10 | 
| 19 | 11 from vhom import RegionRunner | 
|  | 12 | 
| 0 | 13 from argparse import ArgumentParser | 
|  | 14 from matplotlib.backends.backend_pdf import PdfPages | 
|  | 15 | 
|  | 16 import os | 
| 31 | 17 import shutil | 
| 0 | 18 | 
| 20 | 19 import sys | 
|  | 20 | 
| 0 | 21 import colorsys | 
|  | 22 | 
|  | 23 | 
|  | 24 | 
|  | 25 | 
|  | 26 rcParams['figure.figsize'] = (6, 6) | 
|  | 27 rcParams['figure.dpi'] = 100 | 
|  | 28 rcParams['axes.facecolor'] = 'white' | 
|  | 29 rcParams['font.size'] = 10 | 
|  | 30 rcParams['patch.edgecolor'] = 'white' | 
|  | 31 rcParams['patch.linewidth']=0.5 | 
|  | 32 rcParams['patch.facecolor'] = 'black' | 
|  | 33 rcParams['font.family'] = 'StixGeneral' | 
|  | 34 | 
|  | 35 | 
|  | 36 def color_variants(r,g,b): | 
|  | 37     h,l2,s= colorsys.rgb_to_hls(r,g,b) | 
|  | 38     l=[l2-0.4,l2-0.1,l2+0.2] | 
|  | 39     hexs=[] | 
|  | 40     for i in l: | 
|  | 41         hexs.append(colors.rgb2hex(colorsys.hls_to_rgb(h,i,s))) | 
|  | 42     return hexs | 
|  | 43 | 
|  | 44 def rgb_color_variants(r,g,b): | 
|  | 45     h,l2,s= colorsys.rgb_to_hls(r,g,b) | 
|  | 46     l=[l2-0.4,l2-0.1,l2+0.2] | 
|  | 47     rgbs={} | 
|  | 48     rgbs["pathogen"]=colorsys.hls_to_rgb(h,l[0],s) | 
|  | 49     rgbs["ambiguous"]=colorsys.hls_to_rgb(h,l[1],s) | 
|  | 50     rgbs["human"]=colorsys.hls_to_rgb(h,l[2],s) | 
|  | 51     return rgbs | 
|  | 52 | 
|  | 53 | 
|  | 54 | 
|  | 55 | 
|  | 56 def remove_border(axes=None, top=False, right=False, left=True, bottom=True): | 
|  | 57     """ | 
|  | 58     Minimize chartjunk by stripping out unnecesasry plot borders and axis ticks | 
|  | 59 | 
|  | 60     The top/right/left/bottom keywords toggle whether the corresponding plot border is drawn | 
|  | 61     """ | 
|  | 62     ax = axes or plt.gca() | 
|  | 63     ax.spines['top'].set_visible(top) | 
|  | 64     ax.spines['right'].set_visible(right) | 
|  | 65     ax.spines['left'].set_visible(left) | 
|  | 66     ax.spines['bottom'].set_visible(bottom) | 
|  | 67 | 
|  | 68     #turn off all ticks | 
|  | 69     ax.yaxis.set_ticks_position('none') | 
|  | 70     ax.xaxis.set_ticks_position('none') | 
|  | 71 | 
|  | 72     #now re-enable visibles | 
|  | 73     if top: | 
|  | 74         ax.xaxis.tick_top() | 
|  | 75     if bottom: | 
|  | 76         ax.xaxis.tick_bottom() | 
|  | 77     if left: | 
|  | 78         ax.yaxis.tick_left() | 
|  | 79     if right: | 
|  | 80         ax.yaxis.tick_right() | 
|  | 81 | 
|  | 82 | 
|  | 83 def generateHTML(stat_dict,file,directory): | 
|  | 84     rval=["<html><head><title>Homologous groups and regions derived from hit files </title></head><body><p/>\n"] | 
|  | 85     rval.append("<a href='plot.pdf'>Download Plot</a>") | 
|  | 86     n_samples=0 | 
|  | 87     for sample in stat_dict.iterkeys(): | 
|  | 88         n_samples+=1 | 
|  | 89         rval.append("<div id=%s_graph></div><p/>"%(sample)) | 
| 25 | 90     curr_dir = os.path.dirname(os.path.realpath(__file__)) | 
| 31 | 91     shutil.copy(os.path.join(curr_dir,"jquery-1.10.2.min.js"),directory) | 
|  | 92     shutil.copy(os.path.join(curr_dir,"jqBarGraph.2.1.js"),directory) | 
|  | 93     rval.append("<script type='text/javascript' src='jquery-1.10.2.min.js'></script>") | 
|  | 94     rval.append("<script type='text/javascript' src='jqBarGraph.2.1.js'></script>") | 
| 0 | 95     rval.append('<script>') | 
|  | 96     i=0 | 
|  | 97     for sample in stat_dict.iterkeys(): | 
|  | 98         i+=1 | 
|  | 99         rval.append("%s_array = new Array(" %sample) | 
|  | 100         for family in stat_dict[sample].iterkeys(): | 
|  | 101             values=[] | 
|  | 102             for region,data in stat_dict[sample][family].iteritems(): | 
|  | 103                   values.append([int(data[0][1]),int(data[0][2]), data[0][0], data[0][3],int(data[0][4]),int(region)]) | 
|  | 104             rval.append(str([values,family])+",") | 
|  | 105         rval[-1]=rval[-1][:-1] | 
|  | 106         rval.append(");") | 
|  | 107         rval.append("%s_files=%s;"%(sample,str(getFiles(directory)))) | 
|  | 108         rval.append("$('#%s_graph').jqBarGraph({"%sample) | 
|  | 109         rval.append("sample: '%s',"%sample) | 
|  | 110         rval.append("data: %s_array,"%sample) | 
|  | 111         rval.append("files: %s_files,"%sample) | 
|  | 112         r,g,b,a=cm.hsv(1.*i/n_samples) | 
|  | 113         rval.append("colors: %s," %color_variants(r,g,b)) | 
|  | 114         rval.append("legend: true,") | 
|  | 115         rval.append("tab: 'reads',") | 
|  | 116         rval.append("title: '<H3>Visualisation of Sample %s</H3>',"%sample) | 
|  | 117         rval.append("width: %d"%((len(stat_dict[sample])*50)+150)) | 
|  | 118         rval.append("});") | 
|  | 119 #        rval.append("jQuery.get('test.py', function(data) {") | 
|  | 120 #        rval.append("alert(data)") | 
|  | 121 #        rval.append("})") | 
|  | 122     rval.append("</script>") | 
|  | 123 | 
|  | 124     rval.append( '</body></html>' ) | 
|  | 125     with open(file,'w') as file: | 
|  | 126         file.write("\n".join( rval )) | 
| 33 | 127 def transformDict(stat_dict): | 
| 0 | 128     dict={} | 
|  | 129     position={} | 
|  | 130     dict_label={} | 
|  | 131     for sample in stat_dict.iterkeys(): | 
|  | 132         dict[sample]=[{},{}] | 
|  | 133         position[sample]={} | 
|  | 134         dict_label[sample]={} | 
|  | 135         j=0 | 
|  | 136         for family in stat_dict[sample].iterkeys(): | 
|  | 137 | 
|  | 138             i=0 | 
|  | 139             for region,values in stat_dict[sample][family].iteritems(): | 
|  | 140 | 
|  | 141                 if i not in dict[sample][0]: | 
|  | 142                     dict[sample][0][i]=[] | 
|  | 143                     dict[sample][1][i]=[] | 
|  | 144                     position[sample][i]=[] | 
|  | 145                     dict_label[sample][i]=[] | 
|  | 146                 dict[sample][0][i].append(int(values[0][1])) | 
|  | 147                 dict[sample][1][i].append(int(values[0][2])) | 
|  | 148                 position[sample][i].append(j) | 
|  | 149                 dict_label[sample][i].append(values[0][0]) | 
|  | 150                 i+=1 | 
|  | 151             j+=1 | 
| 33 | 152     return dict,position,dict_label | 
|  | 153 | 
|  | 154 def customeLegend(color_map): | 
|  | 155     legend_map=[[],[]] | 
|  | 156     for label  in color_map: | 
|  | 157         box=Rectangle((0,0),1,1,color=color_map[label],fill=True) | 
|  | 158         legend_map[0].append(box) | 
|  | 159         legend_map[1].append(label) | 
|  | 160     plt.figlegend(legend_map[0],legend_map[1],loc=8,ncol=len(color_map)) | 
|  | 161 | 
|  | 162 | 
|  | 163 | 
|  | 164 | 
|  | 165 def generatePyPlot(stat_dict,output_file): | 
|  | 166     pp = PdfPages(output_file) | 
|  | 167     fig = plt.figure() | 
|  | 168 | 
|  | 169     n_samples=len(stat_dict) | 
|  | 170     dict,position,dict_label=transformDict(stat_dict) | 
| 0 | 171 | 
|  | 172     i=0 | 
|  | 173     for sample in dict.iterkeys(): | 
|  | 174         fig = plt.figure() | 
| 33 | 175 | 
| 0 | 176         fig.subplots_adjust(bottom=0.25,hspace=0.5) | 
| 33 | 177 | 
| 0 | 178         r,g,b,a=cm.hsv(1.*i/n_samples) | 
|  | 179         color_map=rgb_color_variants(r,g,b) | 
| 33 | 180 | 
| 0 | 181         for sub in [0,1]: | 
|  | 182           plt.subplot(1,2,sub+1) | 
|  | 183           bottom={} | 
|  | 184           for level,values in dict[sample][sub].iteritems(): | 
|  | 185               colors=[color_map[r] for r in dict_label[sample][level]] | 
| 33 | 186 | 
| 0 | 187               if level==0: | 
| 25 | 188 | 
| 0 | 189                   plt.bar(position[sample][level],values,bottom=0, width=0.8 ,color=colors) | 
|  | 190               else: | 
|  | 191                   lastvalues=[] | 
|  | 192                   for oldpos in range(len(values)): | 
|  | 193                       lastvalues.append(bottom[position[sample][level][oldpos]]) | 
| 25 | 194 | 
| 0 | 195                   plt.bar(position[sample][level],values, width=0.8,bottom=lastvalues ,color=colors) | 
|  | 196               for pos in range(len(values)): | 
|  | 197                   if position[sample][level][pos] not in bottom: | 
|  | 198                         bottom[position[sample][level][pos]]=0 | 
|  | 199                   else: | 
|  | 200                         bottom[position[sample][level][pos]]+=values[pos] | 
|  | 201           pos=[x+0.4 for x in range(len(stat_dict[sample]))] | 
|  | 202           plt.xticks(pos, stat_dict[sample].keys(), rotation='vertical') | 
|  | 203           if(sub==0): | 
| 33 | 204             plt.ylabel("Cumulative reads assigned to family") | 
| 0 | 205           else: | 
| 33 | 206             plt.ylabel("Cumulative basepairs assigned to family") | 
| 0 | 207           plt.xlabel("") | 
|  | 208           fig.subplots_adjust(bottom=0.25,wspace=0.5) | 
|  | 209           remove_border() | 
| 33 | 210 | 
|  | 211         customeLegend(color_map) | 
| 0 | 212         plt.suptitle(sample) | 
|  | 213         pp.savefig(fig) | 
|  | 214         i+=1 | 
|  | 215     pp.close() | 
|  | 216 | 
|  | 217 | 
|  | 218 | 
|  | 219 def parseStatFile(filename): | 
|  | 220 | 
|  | 221     with open(filename,'r') as file: | 
|  | 222         values=[ map(str,line.split('\t')) for line in file ] | 
|  | 223 | 
|  | 224         dict={} | 
|  | 225         for line in values[1:]: | 
|  | 226             if line[3] not in dict: | 
|  | 227                 dict[line[3]]={} | 
|  | 228                 dict[line[3]][line[1]]={} | 
|  | 229                 dict[line[3]][line[1]][line[2]]=[[line[4],line[5],line[7],line[0],line[6]]] | 
|  | 230             else: | 
|  | 231                 if line[1] not in dict[line[3]]: | 
|  | 232                     dict[line[3]][line[1]]={} | 
|  | 233                     dict[line[3]][line[1]][line[2]]=[[line[4],line[5],line[7],line[0],line[6]]] | 
|  | 234                 else: | 
|  | 235                     if line[2] not in dict[line[3]][line[1]]: | 
|  | 236                         dict[line[3]][line[1]][line[2]]=[[line[4],line[5],line[7],line[0],line[6]]] | 
|  | 237                     else: | 
|  | 238                          dict[line[3]][line[1]][line[2]].append([line[4],line[5],line[7],line[0],line[6]]) | 
|  | 239 #        for key in dict.iterkeys(): | 
|  | 240 #            for family_key,values in dict[key].iteritems(): | 
|  | 241 #                for line in values: | 
|  | 242 #                    print key,family_key,line | 
|  | 243         return dict | 
|  | 244 def getFiles(directory): | 
|  | 245     rval={} | 
|  | 246     dlist = os.listdir(directory) | 
|  | 247     for dire in dlist: | 
|  | 248         if(os.path.isdir(os.path.join(directory,dire))): | 
|  | 249              rval[dire]={} | 
|  | 250              flist = os.listdir(os.path.join(directory,dire)) | 
|  | 251              for file in flist: | 
|  | 252                 split=file.split("_") | 
|  | 253                 region=split[1] | 
|  | 254                 if region not in rval[dire]: | 
|  | 255                     rval[dire][region]=[file] | 
|  | 256                 else: | 
|  | 257                     rval[dire][region].append(file) | 
|  | 258 | 
|  | 259     return rval | 
|  | 260 | 
|  | 261 | 
|  | 262 | 
|  | 263 if __name__ == "__main__": | 
|  | 264 | 
|  | 265 | 
|  | 266     parser = ArgumentParser() | 
|  | 267 | 
|  | 268     a = parser.add_argument | 
|  | 269     a("--html_file",dest="html_file") | 
|  | 270     a("--directory",dest="directory") | 
|  | 271     a("--stats",dest="stat_file") | 
|  | 272 | 
|  | 273     (options,args)= parser.parse_known_args() | 
|  | 274 | 
| 18 | 275     args.insert(0,"dummy") | 
|  | 276     try: | 
|  | 277         RegionRunner.run(argv=args) | 
| 21 | 278     except SystemExit: | 
|  | 279         stat_dict=parseStatFile(options.stat_file) | 
|  | 280         generatePyPlot(stat_dict,os.path.join(options.directory,"plot.pdf")) | 
|  | 281         generateHTML(stat_dict,options.html_file,options.directory) | 
| 0 | 282 | 
|  | 283 | 
|  | 284 | 
|  | 285 | 
|  | 286 |