| 0 | 1 import matplotlib.pyplot as plt | 
|  | 2 | 
|  | 3 from matplotlib import rcParams | 
|  | 4 from matplotlib import colors | 
|  | 5 from matplotlib import cm | 
|  | 6 | 
|  | 7 from argparse import ArgumentParser | 
|  | 8 from matplotlib.backends.backend_pdf import PdfPages | 
|  | 9 | 
|  | 10 import os | 
|  | 11 | 
|  | 12 import colorsys | 
|  | 13 | 
|  | 14 | 
|  | 15 | 
|  | 16 | 
|  | 17 rcParams['figure.figsize'] = (6, 6) | 
|  | 18 rcParams['figure.dpi'] = 100 | 
|  | 19 rcParams['axes.facecolor'] = 'white' | 
|  | 20 rcParams['font.size'] = 10 | 
|  | 21 rcParams['patch.edgecolor'] = 'white' | 
|  | 22 rcParams['patch.linewidth']=0.5 | 
|  | 23 rcParams['patch.facecolor'] = 'black' | 
|  | 24 rcParams['font.family'] = 'StixGeneral' | 
|  | 25 | 
|  | 26 | 
|  | 27 def color_variants(r,g,b): | 
|  | 28     h,l2,s= colorsys.rgb_to_hls(r,g,b) | 
|  | 29     l=[l2-0.4,l2-0.1,l2+0.2] | 
|  | 30     hexs=[] | 
|  | 31     for i in l: | 
|  | 32         hexs.append(colors.rgb2hex(colorsys.hls_to_rgb(h,i,s))) | 
|  | 33     return hexs | 
|  | 34 | 
|  | 35 def rgb_color_variants(r,g,b): | 
|  | 36     h,l2,s= colorsys.rgb_to_hls(r,g,b) | 
|  | 37     l=[l2-0.4,l2-0.1,l2+0.2] | 
|  | 38     rgbs={} | 
|  | 39     rgbs["pathogen"]=colorsys.hls_to_rgb(h,l[0],s) | 
|  | 40     rgbs["ambiguous"]=colorsys.hls_to_rgb(h,l[1],s) | 
|  | 41     rgbs["human"]=colorsys.hls_to_rgb(h,l[2],s) | 
|  | 42     return rgbs | 
|  | 43 | 
|  | 44 | 
|  | 45 | 
|  | 46 | 
|  | 47 def remove_border(axes=None, top=False, right=False, left=True, bottom=True): | 
|  | 48     """ | 
|  | 49     Minimize chartjunk by stripping out unnecesasry plot borders and axis ticks | 
|  | 50 | 
|  | 51     The top/right/left/bottom keywords toggle whether the corresponding plot border is drawn | 
|  | 52     """ | 
|  | 53     ax = axes or plt.gca() | 
|  | 54     ax.spines['top'].set_visible(top) | 
|  | 55     ax.spines['right'].set_visible(right) | 
|  | 56     ax.spines['left'].set_visible(left) | 
|  | 57     ax.spines['bottom'].set_visible(bottom) | 
|  | 58 | 
|  | 59     #turn off all ticks | 
|  | 60     ax.yaxis.set_ticks_position('none') | 
|  | 61     ax.xaxis.set_ticks_position('none') | 
|  | 62 | 
|  | 63     #now re-enable visibles | 
|  | 64     if top: | 
|  | 65         ax.xaxis.tick_top() | 
|  | 66     if bottom: | 
|  | 67         ax.xaxis.tick_bottom() | 
|  | 68     if left: | 
|  | 69         ax.yaxis.tick_left() | 
|  | 70     if right: | 
|  | 71         ax.yaxis.tick_right() | 
|  | 72 | 
|  | 73 | 
|  | 74 def generateHTML(stat_dict,file,directory): | 
|  | 75     rval=["<html><head><title>Homologous groups and regions derived from hit files </title></head><body><p/>\n"] | 
|  | 76     rval.append("<a href='plot.pdf'>Download Plot</a>") | 
|  | 77     n_samples=0 | 
|  | 78     for sample in stat_dict.iterkeys(): | 
|  | 79         n_samples+=1 | 
|  | 80         rval.append("<div id=%s_graph></div><p/>"%(sample)) | 
|  | 81     rval.append("<script src='jquery-1.10.2.min.js'></script>") | 
|  | 82     rval.append("<script src='jqBarGraph.2.1.js'></script>") | 
|  | 83     rval.append('<script>') | 
|  | 84     i=0 | 
|  | 85     for sample in stat_dict.iterkeys(): | 
|  | 86         i+=1 | 
|  | 87         rval.append("%s_array = new Array(" %sample) | 
|  | 88         for family in stat_dict[sample].iterkeys(): | 
|  | 89             values=[] | 
|  | 90             for region,data in stat_dict[sample][family].iteritems(): | 
|  | 91                   values.append([int(data[0][1]),int(data[0][2]), data[0][0], data[0][3],int(data[0][4]),int(region)]) | 
|  | 92             rval.append(str([values,family])+",") | 
|  | 93         rval[-1]=rval[-1][:-1] | 
|  | 94         rval.append(");") | 
|  | 95         rval.append("%s_files=%s;"%(sample,str(getFiles(directory)))) | 
|  | 96         rval.append("$('#%s_graph').jqBarGraph({"%sample) | 
|  | 97         rval.append("sample: '%s',"%sample) | 
|  | 98         rval.append("data: %s_array,"%sample) | 
|  | 99         rval.append("files: %s_files,"%sample) | 
|  | 100         r,g,b,a=cm.hsv(1.*i/n_samples) | 
|  | 101         rval.append("colors: %s," %color_variants(r,g,b)) | 
|  | 102         rval.append("legend: true,") | 
|  | 103         rval.append("tab: 'reads',") | 
|  | 104         rval.append("title: '<H3>Visualisation of Sample %s</H3>',"%sample) | 
|  | 105         rval.append("width: %d"%((len(stat_dict[sample])*50)+150)) | 
|  | 106         rval.append("});") | 
|  | 107 #        rval.append("jQuery.get('test.py', function(data) {") | 
|  | 108 #        rval.append("alert(data)") | 
|  | 109 #        rval.append("})") | 
|  | 110     rval.append("</script>") | 
|  | 111 | 
|  | 112     rval.append( '</body></html>' ) | 
|  | 113     with open(file,'w') as file: | 
|  | 114         file.write("\n".join( rval )) | 
|  | 115 | 
|  | 116 def generatePyPlot(stat_dict,output_file): | 
|  | 117     pp = PdfPages(output_file) | 
|  | 118     fig = plt.figure() | 
|  | 119 | 
|  | 120     n_samples=len(stat_dict) | 
|  | 121     dict={} | 
|  | 122     position={} | 
|  | 123     dict_label={} | 
|  | 124     for sample in stat_dict.iterkeys(): | 
|  | 125         dict[sample]=[{},{}] | 
|  | 126         position[sample]={} | 
|  | 127         dict_label[sample]={} | 
|  | 128         j=0 | 
|  | 129         for family in stat_dict[sample].iterkeys(): | 
|  | 130 | 
|  | 131             i=0 | 
|  | 132             for region,values in stat_dict[sample][family].iteritems(): | 
|  | 133 | 
|  | 134                 if i not in dict[sample][0]: | 
|  | 135                     dict[sample][0][i]=[] | 
|  | 136                     dict[sample][1][i]=[] | 
|  | 137                     position[sample][i]=[] | 
|  | 138                     dict_label[sample][i]=[] | 
|  | 139                 dict[sample][0][i].append(int(values[0][1])) | 
|  | 140                 dict[sample][1][i].append(int(values[0][2])) | 
|  | 141                 position[sample][i].append(j) | 
|  | 142                 dict_label[sample][i].append(values[0][0]) | 
|  | 143                 i+=1 | 
|  | 144             j+=1 | 
|  | 145 | 
|  | 146     i=0 | 
|  | 147     for sample in dict.iterkeys(): | 
|  | 148         fig = plt.figure() | 
|  | 149         fig.subplots_adjust(bottom=0.25,hspace=0.5) | 
|  | 150         r,g,b,a=cm.hsv(1.*i/n_samples) | 
|  | 151         color_map=rgb_color_variants(r,g,b) | 
|  | 152         for sub in [0,1]: | 
|  | 153           plt.subplot(1,2,sub+1) | 
|  | 154           bottom={} | 
|  | 155           for level,values in dict[sample][sub].iteritems(): | 
|  | 156               colors=[color_map[r] for r in dict_label[sample][level]] | 
|  | 157               if level==0: | 
|  | 158                   print level, values, position[sample][level],bottom | 
|  | 159                   plt.bar(position[sample][level],values,bottom=0, width=0.8 ,color=colors) | 
|  | 160               else: | 
|  | 161                   lastvalues=[] | 
|  | 162                   for oldpos in range(len(values)): | 
|  | 163                       lastvalues.append(bottom[position[sample][level][oldpos]]) | 
|  | 164                   print level, values, position[sample][level], lastvalues | 
|  | 165                   plt.bar(position[sample][level],values, width=0.8,bottom=lastvalues ,color=colors) | 
|  | 166               for pos in range(len(values)): | 
|  | 167                   if position[sample][level][pos] not in bottom: | 
|  | 168                         bottom[position[sample][level][pos]]=0 | 
|  | 169                   else: | 
|  | 170                         bottom[position[sample][level][pos]]+=values[pos] | 
|  | 171           pos=[x+0.4 for x in range(len(stat_dict[sample]))] | 
|  | 172           plt.xticks(pos, stat_dict[sample].keys(), rotation='vertical') | 
|  | 173           if(sub==0): | 
|  | 174             plt.ylabel("Reads") | 
|  | 175           else: | 
|  | 176             plt.ylabel("Basepairs") | 
|  | 177           plt.xlabel("") | 
|  | 178           fig.subplots_adjust(bottom=0.25,wspace=0.5) | 
|  | 179           remove_border() | 
|  | 180         plt.suptitle(sample) | 
|  | 181         pp.savefig(fig) | 
|  | 182         i+=1 | 
|  | 183     pp.close() | 
|  | 184 | 
|  | 185 | 
|  | 186 | 
|  | 187 def parseStatFile(filename): | 
|  | 188 | 
|  | 189     with open(filename,'r') as file: | 
|  | 190         values=[ map(str,line.split('\t')) for line in file ] | 
|  | 191 | 
|  | 192         dict={} | 
|  | 193         for line in values[1:]: | 
|  | 194             if line[3] not in dict: | 
|  | 195                 dict[line[3]]={} | 
|  | 196                 dict[line[3]][line[1]]={} | 
|  | 197                 dict[line[3]][line[1]][line[2]]=[[line[4],line[5],line[7],line[0],line[6]]] | 
|  | 198             else: | 
|  | 199                 if line[1] not in dict[line[3]]: | 
|  | 200                     dict[line[3]][line[1]]={} | 
|  | 201                     dict[line[3]][line[1]][line[2]]=[[line[4],line[5],line[7],line[0],line[6]]] | 
|  | 202                 else: | 
|  | 203                     if line[2] not in dict[line[3]][line[1]]: | 
|  | 204                         dict[line[3]][line[1]][line[2]]=[[line[4],line[5],line[7],line[0],line[6]]] | 
|  | 205                     else: | 
|  | 206                          dict[line[3]][line[1]][line[2]].append([line[4],line[5],line[7],line[0],line[6]]) | 
|  | 207 #        for key in dict.iterkeys(): | 
|  | 208 #            for family_key,values in dict[key].iteritems(): | 
|  | 209 #                for line in values: | 
|  | 210 #                    print key,family_key,line | 
|  | 211         return dict | 
|  | 212 def getFiles(directory): | 
|  | 213     rval={} | 
|  | 214     dlist = os.listdir(directory) | 
|  | 215     for dire in dlist: | 
|  | 216         if(os.path.isdir(os.path.join(directory,dire))): | 
|  | 217              rval[dire]={} | 
|  | 218              flist = os.listdir(os.path.join(directory,dire)) | 
|  | 219              for file in flist: | 
|  | 220                 split=file.split("_") | 
|  | 221                 region=split[1] | 
|  | 222                 if region not in rval[dire]: | 
|  | 223                     rval[dire][region]=[file] | 
|  | 224                 else: | 
|  | 225                     rval[dire][region].append(file) | 
|  | 226 | 
|  | 227     return rval | 
|  | 228 | 
|  | 229 | 
|  | 230 | 
|  | 231 if __name__ == "__main__": | 
|  | 232 | 
|  | 233 | 
|  | 234     parser = ArgumentParser() | 
|  | 235 | 
|  | 236     a = parser.add_argument | 
|  | 237     a("--html_file",dest="html_file") | 
|  | 238     a("--directory",dest="directory") | 
|  | 239     a("--stats",dest="stat_file") | 
|  | 240 | 
|  | 241     (options,args)= parser.parse_known_args() | 
|  | 242 | 
|  | 243 #    args.insert(0,"dummy") | 
|  | 244 #    try: | 
|  | 245 #        RegionRunner.run(argv=args) | 
|  | 246 #    except SystemExit: | 
|  | 247 | 
|  | 248     stat_dict=parseStatFile(options.stat_file) | 
|  | 249     generatePyPlot(stat_dict,os.path.join(options.directory,"plot.pdf")) | 
|  | 250     generateHTML(stat_dict,options.html_file,options.directory) | 
|  | 251 | 
|  | 252 | 
|  | 253 | 
|  | 254 | 
|  | 255 |