|
11
|
1 ################################################################################
|
|
|
2 #
|
|
|
3 # MRC FGU Computational Genomics Group
|
|
|
4 #
|
|
|
5 # $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $
|
|
|
6 #
|
|
|
7 # Copyright (C) 2009 Andreas Heger
|
|
|
8 #
|
|
|
9 # This program is free software; you can redistribute it and/or
|
|
|
10 # modify it under the terms of the GNU General Public License
|
|
|
11 # as published by the Free Software Foundation; either version 2
|
|
|
12 # of the License, or (at your option) any later version.
|
|
|
13 #
|
|
|
14 # This program is distributed in the hope that it will be useful,
|
|
|
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
17 # GNU General Public License for more details.
|
|
|
18 #
|
|
|
19 # You should have received a copy of the GNU General Public License
|
|
|
20 # along with this program; if not, write to the Free Software
|
|
|
21 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
22 #################################################################################
|
|
|
23 '''
|
|
|
24 gat-plot - plot results from a gat analysis
|
|
|
25 ===========================================
|
|
|
26
|
|
|
27 :Author: Andreas Heger
|
|
|
28 :Release: $Id$
|
|
|
29 :Date: |today|
|
|
|
30 :Tags: Python
|
|
|
31
|
|
|
32 Purpose
|
|
|
33 -------
|
|
|
34
|
|
|
35 This script takes the results of a ``gat-run.py` or ``gat-compare.py``
|
|
|
36 and plots the results.
|
|
|
37
|
|
|
38 This script requires matplotlib.
|
|
|
39
|
|
|
40 Usage
|
|
|
41 -----
|
|
|
42
|
|
|
43 Example::
|
|
|
44
|
|
|
45 python gat-plot.py --input-filename-results=gat.results.tsv.gz
|
|
|
46 python gat-plot.py --input-filename-counts=gat.counts.tsv.gz
|
|
|
47
|
|
|
48 Type::
|
|
|
49
|
|
|
50 python gatplot.py --help
|
|
|
51
|
|
|
52 for command line help.
|
|
|
53
|
|
|
54 Documentation
|
|
|
55 -------------
|
|
|
56
|
|
|
57 Code
|
|
|
58 ----
|
|
|
59
|
|
|
60 '''
|
|
|
61
|
|
|
62 import os, sys, re, optparse, collections, types, glob, time
|
|
|
63 import numpy
|
|
|
64
|
|
|
65 import gat
|
|
|
66 import gat.Experiment as E
|
|
|
67 import gat.IOTools as IOTools
|
|
|
68 import gat.IO as IO
|
|
|
69
|
|
|
70 try:
|
|
|
71 import matplotlib.pyplot as plt
|
|
|
72 HASPLOT = True
|
|
|
73 except (ImportError,RuntimeError):
|
|
|
74 HASPLOT = False
|
|
|
75
|
|
|
76 class DummyAnnotatorResult:
|
|
|
77
|
|
|
78 format_observed = "%i"
|
|
|
79 format_expected = "%6.4f"
|
|
|
80 format_fold = "%6.4f"
|
|
|
81 format_pvalue = "%6.4e"
|
|
|
82
|
|
|
83 def __init__( self ):
|
|
|
84 pass
|
|
|
85
|
|
|
86 @classmethod
|
|
|
87 def _fromLine( cls, line ):
|
|
|
88 x = cls()
|
|
|
89 data = line[:-1].split("\t")
|
|
|
90 x.track, x.annotation = data[:2]
|
|
|
91 x.observed, x.expected, x.lower95, x.upper95, x.stddev, x.fold, x.pvalue, x.qvalue = \
|
|
|
92 map(float, data[2:] )
|
|
|
93 return x
|
|
|
94
|
|
|
95 def __str__(self):
|
|
|
96 return "\t".join( (self.track,
|
|
|
97 self.annotation,
|
|
|
98 self.format_observed % self.observed,
|
|
|
99 self.format_expected % self.expected,
|
|
|
100 self.format_expected % self.lower95,
|
|
|
101 self.format_expected % self.upper95,
|
|
|
102 self.format_expected % self.stddev,
|
|
|
103 self.format_fold % self.fold,
|
|
|
104 self.format_pvalue % self.pvalue,
|
|
|
105 self.format_pvalue % self.qvalue ) )
|
|
|
106
|
|
|
107 def buildPlotFilename( options, key ):
|
|
|
108 filename = re.sub("%s", key, options.output_plots_pattern)
|
|
|
109 filename = re.sub("[^a-zA-Z0-9-_./]", "_", filename )
|
|
|
110 dirname = os.path.dirname( filename )
|
|
|
111 if dirname and not os.path.exists( dirname ): os.makedirs( dirname )
|
|
|
112 return filename
|
|
|
113
|
|
|
114 def plotBarplots( annotator_results, options ):
|
|
|
115 '''output a series of bar-plots.
|
|
|
116
|
|
|
117 Output for each track.
|
|
|
118
|
|
|
119 Significant results are opaque, while
|
|
|
120 non-significant results are transparent.'''
|
|
|
121
|
|
|
122 for track in annotator_results:
|
|
|
123 plt.figure()
|
|
|
124 r = annotator_results[track]
|
|
|
125 keys, values = zip( *r.items())
|
|
|
126 pos = range(len(r))
|
|
|
127 bars = plt.barh( pos, [x.fold for x in values] )
|
|
|
128 for b,v in zip(bars, values):
|
|
|
129 if v.qvalue > 0.05: b.set_alpha( 0.10 )
|
|
|
130
|
|
|
131 filename = buildPlotFilename( options, "bars-%s" % track )
|
|
|
132 plt.yticks( pos, keys )
|
|
|
133 plt.axvline( x=1, color="r")
|
|
|
134 plt.savefig( filename )
|
|
|
135
|
|
|
136 def plotBarplot( annotator_results, options ):
|
|
|
137 '''output a single bar-plots.
|
|
|
138
|
|
|
139 Output for each track.
|
|
|
140
|
|
|
141 Significant results are opaque, while
|
|
|
142 non-significant results are transparent.'''
|
|
|
143
|
|
|
144 ntracks = len(annotator_results )
|
|
|
145 height = 1.0 / float(ntracks)
|
|
|
146
|
|
|
147 plt.figure()
|
|
|
148
|
|
|
149 for trackid, track in enumerate(annotator_results):
|
|
|
150
|
|
|
151 r = annotator_results[track]
|
|
|
152 rr = r.items()
|
|
|
153 rr.sort()
|
|
|
154 keys, values = zip(*rr)
|
|
|
155 pos = numpy.arange(0,len(r),1) + trackid * height
|
|
|
156 bars = plt.barh( pos,
|
|
|
157 [x.fold for x in values],
|
|
|
158 height = height,
|
|
|
159 label = track,
|
|
|
160 xerr = [x.stddev / x.expected for x in values],
|
|
|
161 color = "bryg"[trackid % 4])
|
|
|
162 for b,v in zip(bars, values):
|
|
|
163 if v.pvalue > 0.05: b.set_alpha( 0.10 )
|
|
|
164
|
|
|
165 pos = range(len(r))
|
|
|
166
|
|
|
167 plt.yticks( pos, keys )
|
|
|
168 plt.axvline(x=1, color = "r" )
|
|
|
169 filename = buildPlotFilename( options, "bars-all" )
|
|
|
170 plt.legend()
|
|
|
171 plt.savefig( filename )
|
|
|
172
|
|
|
173 def main( argv = None ):
|
|
|
174 """script main.
|
|
|
175
|
|
|
176 parses command line options in sys.argv, unless *argv* is given.
|
|
|
177 """
|
|
|
178
|
|
|
179 if not argv: argv = sys.argv
|
|
|
180
|
|
|
181 # setup command line parser
|
|
|
182 parser = optparse.OptionParser( version = "%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $",
|
|
|
183 usage = globals()["__doc__"] )
|
|
|
184
|
|
|
185 parser.add_option("-l", "--sample-file", dest="sample_files", type="string", action="append",
|
|
|
186 help="filename with sample files. Start processing from samples [default=%default]." )
|
|
|
187
|
|
|
188 parser.add_option("-o", "--order", dest="output_order", type="choice",
|
|
|
189 choices = ( "track", "annotation", "fold", "pvalue", "qvalue" ),
|
|
|
190 help="order results in output by fold, track, etc. [default=%default]." )
|
|
|
191
|
|
|
192 parser.add_option("-p", "--pvalue-method", dest="pvalue_method", type="choice",
|
|
|
193 choices = ( "empirical", "norm", ),
|
|
|
194 help="type of pvalue reported [default=%default]." )
|
|
|
195
|
|
|
196 parser.add_option( "--results-file", dest="input_filename_results", type="string",
|
|
|
197 help="start processing from results - no segments required [default=%default]." )
|
|
|
198
|
|
|
199 parser.add_option( "--output-plots-pattern", dest="output_plots_pattern", type="string",
|
|
|
200 help="output pattern for plots [default=%default]" )
|
|
|
201
|
|
|
202 parser.add_option( "--output-samples-pattern", dest="output_samples_pattern", type="string",
|
|
|
203 help="output pattern for samples. Samples are stored in bed format, one for "
|
|
|
204 " each segment [default=%default]" )
|
|
|
205
|
|
|
206 parser.add_option( "--plots", dest="plots", type="choice",
|
|
|
207 choices = ( "all",
|
|
|
208 "bars-per-track",
|
|
|
209 "bars", ),
|
|
|
210 help="plots to be created [default=%default]." )
|
|
|
211
|
|
|
212 parser.set_defaults(
|
|
|
213 sample_files = [],
|
|
|
214 num_samples = 1000,
|
|
|
215 output_stats = [],
|
|
|
216 output_filename_counts = None,
|
|
|
217 output_order = "fold",
|
|
|
218 input_filename_results = None,
|
|
|
219 pvalue_method = "empirical",
|
|
|
220 output_plots_pattern = None,
|
|
|
221 )
|
|
|
222
|
|
|
223 ## add common options (-h/--help, ...) and parse command line
|
|
|
224 (options, args) = E.Start( parser, argv = argv, add_output_options = True )
|
|
|
225
|
|
|
226 annotator_results = IO.readAnnotatorResults( options.input_filename_results )
|
|
|
227
|
|
|
228 if "speparate-bars" in options.plots:
|
|
|
229 plotBarplots( annotator_results, options )
|
|
|
230 if "bars" in options.plots:
|
|
|
231 plotBarplot( annotator_results, options )
|
|
|
232
|
|
|
233 ## write footer and output benchmark information.
|
|
|
234 E.Stop()
|
|
|
235
|
|
|
236 if __name__ == "__main__":
|
|
|
237 sys.exit( main( sys.argv) )
|