annotate plot_features.py @ 20:a6284ef17bf3 draft default tip

Fixed bug due to numerical approximation after normalization affecting root-level clades (e.g. "Bacteria" or "Archaea")
author george-weingart
date Tue, 07 Jul 2015 13:26:55 -0400
parents 47ac77f2fe68
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
1 #!/usr/bin/env python
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
2
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
3 import os,sys,matplotlib,zipfile,argparse,string
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
4 matplotlib.use('Agg')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
5 from pylab import *
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
6 from lefse import *
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
7 import random as rand
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
8
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
9 colors = ['r','g','b','m','c']
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
10
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
11 def read_params(args):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
12 parser = argparse.ArgumentParser(description='Cladoplot')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
13 parser.add_argument('input_file_1', metavar='INPUT_FILE', type=str, help="dataset files")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
14 parser.add_argument('input_file_2', metavar='INPUT_FILE', type=str, help="LEfSe output file")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
15 parser.add_argument('output_file', metavar='OUTPUT_FILE', type=str, help="the file for the output (the zip file if an archive is required, the output directory otherwise)")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
16 parser.add_argument('--width',dest="width", type=float, default=10.0 )
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
17 parser.add_argument('--height',dest="height", type=float, default=4.0)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
18 parser.add_argument('--top',dest="top", type=float, default=-1.0, help="set maximum y limit (-1.0 means automatic limit)")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
19 parser.add_argument('--bot',dest="bot", type=float, default=0.0, help="set minimum y limit (default 0.0, -1.0 means automatic limit)")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
20 parser.add_argument('--title_font_size',dest="title_font_size", type=str, default="14")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
21 parser.add_argument('--class_font_size',dest="class_font_size", type=str, default="14")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
22 parser.add_argument('--class_label_pos',dest="class_label_pos", type=str, choices=["up","down"], default="up")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
23 parser.add_argument('--subcl_mean',dest="subcl_mean", type=str, choices=["y","n"], default="y")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
24 parser.add_argument('--subcl_median',dest="subcl_median", type=str, choices=["y","n"], default="y")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
25 parser.add_argument('--font_size',dest="font_size", type=str, default="10")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
26 parser.add_argument('-n',dest="unused", metavar="flt", type=float, default=-1.0,help="unused")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
27 parser.add_argument('--format', dest="format", default="png", choices=["png","pdf","svg"], type=str, help="the format for the output file")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
28 parser.add_argument('-f', dest="f", default="diff", choices=["all","diff","one"], type=str, help="wheter to plot all features (all), only those differentially abundant according to LEfSe or only one (the one given with --feature_name) ")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
29 parser.add_argument('--feature_name', dest="feature_name", default="", type=str, help="The name of the feature to plot (levels separated by .) ")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
30 parser.add_argument('--feature_num', dest="feature_num", default="-1", type=int, help="The number of the feature to plot ")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
31 parser.add_argument('--archive', dest="archive", default="none", choices=["zip","none"], type=str, help="")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
32 parser.add_argument('--background_color',dest="back_color", type=str, choices=["k","w"], default="w", help="set the color of the background")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
33 parser.add_argument('--dpi',dest="dpi", type=int, default=72)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
34
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
35 args = parser.parse_args()
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
36
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
37 return vars(args)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
38
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
39 def read_data(file_data,file_feats,params):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
40 with open(file_feats, 'r') as features:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
41 feats_to_plot = [(f.split()[:-1],len(f.split()) == 5) for f in features.readlines()]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
42 if not feats_to_plot:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
43 print "No features to plot\n"
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
44 sys.exit(0)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
45 feats,cls,class_sl,subclass_sl,class_hierarchy,params['norm_v'] = load_data(file_data, True)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
46 if params['feature_num'] > 0:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
47 params['feature_name'] = [line.split()[0] for line in open(params['input_file_2'])][params['feature_num']-1]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
48 features = {}
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
49 for f in feats_to_plot:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
50 if params['f'] == "diff" and not f[1]: continue
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
51 if params['f'] == "one" and f[0][0] != params['feature_name']: continue
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
52 features[f[0][0]] = {'dim':float(f[0][1]), 'abundances':feats[f[0][0]], 'sig':f[1], 'cls':cls, 'class_sl':class_sl, 'subclass_sl':subclass_sl, 'class_hierarchy':class_hierarchy}
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
53 if not features:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
54 print "No features to plot\n"
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
55 sys.exit(0)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
56 return features
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
57
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
58 def plot(name,k_n,feat,params):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
59 fig = plt.figure(figsize=(params['width'], params['height']),edgecolor=params['fore_color'],facecolor=params['back_color'])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
60 ax = fig.add_subplot(111,axis_bgcolor=params['back_color'])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
61 subplots_adjust(bottom=0.15)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
62
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
63 max_m = 0.0
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
64 norm = 1.0 if float(params['norm_v']) < 0.0 else float(params['norm_v'])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
65 for v in feat['subclass_sl'].values():
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
66 fr,to = v[0], v[1]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
67 median = numpy.mean(feat['abundances'][fr:to])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
68 if median > max_m: max_m = median
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
69 max_m /= norm
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
70 max_v = max_m*3 if max_m*3 < max(feat['abundances'])*1.1/norm else max(feat['abundances'])/norm
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
71 min_v = max(0.0,min(feat['abundances'])*0.9/norm)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
72
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
73 if params['top'] > 0.0: max_v = params['top']
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
74 if params['bot'] >= 0.0: min_v = params['bot']
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
75
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
76 if max_v == 0.0: max_v = 0.0001
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
77 if max_v == min_v: max_v = min_v*1.1
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
78
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
79 cl_sep = max(int(sum([vv[1]/norm - vv[0]/norm for vv in feat['class_sl'].values()])/150.0),1)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
80 seps = []
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
81 xtics = []
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
82 x2tics = []
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
83 last_fr = 0.0
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
84 for i,cl in enumerate(sorted(feat['class_hierarchy'].keys())):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
85 for j,subcl in enumerate(feat['class_hierarchy'][cl]):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
86 fr = feat['subclass_sl'][subcl][0]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
87 to = feat['subclass_sl'][subcl][1]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
88 val = feat['abundances'][fr:to]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
89 fr += cl_sep*i
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
90 to += cl_sep*i
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
91 pos = arange(fr,to)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
92 max_x = to
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
93 col = colors[j%len(colors)]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
94 vv = [v1/norm for v1 in val]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
95 median = numpy.median(vv)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
96 mean = numpy.mean(vv)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
97 valv = [max(min(v/norm,max_v),min_v) for v in val]
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
98 ax.bar(pos,valv, align='center', color=col, edgecolor=col, linewidth=0.1 )
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
99 if params['subcl_median'] == 'y': ax.plot([fr,to-1],[median,median],"k--",linewidth=1,color=params['fore_color'])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
100 if params['subcl_mean'] == 'y': ax.plot([fr,to-1],[mean,mean],"-",linewidth=1,color=params['fore_color'])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
101 nna = subcl if subcl.count("_") == 0 or not subcl.startswith(cl) else "_".join(subcl.split(cl)[1:])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
102 if nna == "subcl" or nna == "_subcl": nna = " "
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
103 xtics.append(((fr+(to-fr)/2),nna))
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
104 seps.append(float(to))
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
105 x2tics.append(((last_fr+(to-last_fr)/2),cl))
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
106 last_fr = to + float(cl_sep)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
107 for s in seps[:-1]:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
108 ax.plot([s,s],[min_v,max_v],"-",linewidth=5,color=params['fore_color'])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
109 ax.set_title(k_n, size=params['title_font_size'])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
110 xticks([x[0] for x in xtics],[x[1] for x in xtics],rotation=-30, ha = 'left', fontsize=params['font_size'], color=params['fore_color'])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
111 yticks(fontsize=params['font_size'])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
112
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
113 ylabel('Relative abundance')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
114 ax.set_ylim((min_v,max_v))
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
115 a,b = ax.get_xlim()
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
116 ax.set_xlim((0-float(last_fr)/float(b-a),max_x))
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
117 ax.yaxis.grid(True)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
118
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
119 def get_col_attr(x):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
120 return hasattr(x, 'set_color') and not hasattr(x, 'set_facecolor')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
121 def get_edgecol_attr(x):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
122 return hasattr(x, 'set_edgecolor')
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
123
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
124
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
125 for o in fig.findobj(get_col_attr):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
126 o.set_color(params['fore_color'])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
127 for o in fig.findobj(get_edgecol_attr):
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
128 if o.get_edgecolor() == params['back_color']:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
129 o.set_edgecolor(params['fore_color'])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
130
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
131 for t in x2tics:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
132 m = ax.get_ylim()[1]*0.97 if params['class_label_pos']=='up' else 0.07
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
133 plt.text(t[0],m, "class: "+t[1], ha ="center", size=params['class_font_size'], va="top", bbox = dict(boxstyle="round", ec='k', fc='y'))
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
134
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
135
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
136 plt.savefig(name,format=params['format'],facecolor=params['back_color'],edgecolor=params['fore_color'],dpi=params['dpi'])
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
137 plt.close()
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
138 return name
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
139
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
140 if __name__ == '__main__':
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
141 params = read_params(sys.argv)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
142 params['fore_color'] = 'w' if params['back_color'] == 'k' else 'k'
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
143 features = read_data(params['input_file_1'],params['input_file_2'],params)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
144 if params['archive'] == "zip": file = zipfile.ZipFile(params['output_file'], "w")
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
145 for k,f in features.items():
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
146 print "Exporting ", k
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
147 if params['archive'] == "zip":
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
148 of = plot("/tmp/"+str(int(f['sig']))+"_"+"-".join(k.split("."))+"."+params['format'],k,f,params)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
149 file.write(of, os.path.basename(of), zipfile.ZIP_DEFLATED)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
150 else:
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
151 if params['f'] == 'one': plot(params['output_file'],k,f,params)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
152 else: plot(params['output_file']+str(int(f['sig']))+"_"+"-".join(k.split("."))+"."+params['format'],k,f,params)
47ac77f2fe68 First version of lefse in this toolshed
george-weingart
parents:
diff changeset
153 if params['archive'] == "zip": file.close()