comparison rlGAT/gat-plot.py @ 11:53487f21c0d5 draft

Uploaded
author fubar
date Thu, 29 Aug 2013 01:57:54 -0400
parents
children
comparison
equal deleted inserted replaced
10:f04dfb37d1bb 11:53487f21c0d5
1 ################################################################################
2 #
3 # MRC FGU Computational Genomics Group
4 #
5 # $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $
6 #
7 # Copyright (C) 2009 Andreas Heger
8 #
9 # This program is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License
11 # as published by the Free Software Foundation; either version 2
12 # of the License, or (at your option) any later version.
13 #
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 #################################################################################
23 '''
24 gat-plot - plot results from a gat analysis
25 ===========================================
26
27 :Author: Andreas Heger
28 :Release: $Id$
29 :Date: |today|
30 :Tags: Python
31
32 Purpose
33 -------
34
35 This script takes the results of a ``gat-run.py` or ``gat-compare.py``
36 and plots the results.
37
38 This script requires matplotlib.
39
40 Usage
41 -----
42
43 Example::
44
45 python gat-plot.py --input-filename-results=gat.results.tsv.gz
46 python gat-plot.py --input-filename-counts=gat.counts.tsv.gz
47
48 Type::
49
50 python gatplot.py --help
51
52 for command line help.
53
54 Documentation
55 -------------
56
57 Code
58 ----
59
60 '''
61
62 import os, sys, re, optparse, collections, types, glob, time
63 import numpy
64
65 import gat
66 import gat.Experiment as E
67 import gat.IOTools as IOTools
68 import gat.IO as IO
69
70 try:
71 import matplotlib.pyplot as plt
72 HASPLOT = True
73 except (ImportError,RuntimeError):
74 HASPLOT = False
75
76 class DummyAnnotatorResult:
77
78 format_observed = "%i"
79 format_expected = "%6.4f"
80 format_fold = "%6.4f"
81 format_pvalue = "%6.4e"
82
83 def __init__( self ):
84 pass
85
86 @classmethod
87 def _fromLine( cls, line ):
88 x = cls()
89 data = line[:-1].split("\t")
90 x.track, x.annotation = data[:2]
91 x.observed, x.expected, x.lower95, x.upper95, x.stddev, x.fold, x.pvalue, x.qvalue = \
92 map(float, data[2:] )
93 return x
94
95 def __str__(self):
96 return "\t".join( (self.track,
97 self.annotation,
98 self.format_observed % self.observed,
99 self.format_expected % self.expected,
100 self.format_expected % self.lower95,
101 self.format_expected % self.upper95,
102 self.format_expected % self.stddev,
103 self.format_fold % self.fold,
104 self.format_pvalue % self.pvalue,
105 self.format_pvalue % self.qvalue ) )
106
107 def buildPlotFilename( options, key ):
108 filename = re.sub("%s", key, options.output_plots_pattern)
109 filename = re.sub("[^a-zA-Z0-9-_./]", "_", filename )
110 dirname = os.path.dirname( filename )
111 if dirname and not os.path.exists( dirname ): os.makedirs( dirname )
112 return filename
113
114 def plotBarplots( annotator_results, options ):
115 '''output a series of bar-plots.
116
117 Output for each track.
118
119 Significant results are opaque, while
120 non-significant results are transparent.'''
121
122 for track in annotator_results:
123 plt.figure()
124 r = annotator_results[track]
125 keys, values = zip( *r.items())
126 pos = range(len(r))
127 bars = plt.barh( pos, [x.fold for x in values] )
128 for b,v in zip(bars, values):
129 if v.qvalue > 0.05: b.set_alpha( 0.10 )
130
131 filename = buildPlotFilename( options, "bars-%s" % track )
132 plt.yticks( pos, keys )
133 plt.axvline( x=1, color="r")
134 plt.savefig( filename )
135
136 def plotBarplot( annotator_results, options ):
137 '''output a single bar-plots.
138
139 Output for each track.
140
141 Significant results are opaque, while
142 non-significant results are transparent.'''
143
144 ntracks = len(annotator_results )
145 height = 1.0 / float(ntracks)
146
147 plt.figure()
148
149 for trackid, track in enumerate(annotator_results):
150
151 r = annotator_results[track]
152 rr = r.items()
153 rr.sort()
154 keys, values = zip(*rr)
155 pos = numpy.arange(0,len(r),1) + trackid * height
156 bars = plt.barh( pos,
157 [x.fold for x in values],
158 height = height,
159 label = track,
160 xerr = [x.stddev / x.expected for x in values],
161 color = "bryg"[trackid % 4])
162 for b,v in zip(bars, values):
163 if v.pvalue > 0.05: b.set_alpha( 0.10 )
164
165 pos = range(len(r))
166
167 plt.yticks( pos, keys )
168 plt.axvline(x=1, color = "r" )
169 filename = buildPlotFilename( options, "bars-all" )
170 plt.legend()
171 plt.savefig( filename )
172
173 def main( argv = None ):
174 """script main.
175
176 parses command line options in sys.argv, unless *argv* is given.
177 """
178
179 if not argv: argv = sys.argv
180
181 # setup command line parser
182 parser = optparse.OptionParser( version = "%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $",
183 usage = globals()["__doc__"] )
184
185 parser.add_option("-l", "--sample-file", dest="sample_files", type="string", action="append",
186 help="filename with sample files. Start processing from samples [default=%default]." )
187
188 parser.add_option("-o", "--order", dest="output_order", type="choice",
189 choices = ( "track", "annotation", "fold", "pvalue", "qvalue" ),
190 help="order results in output by fold, track, etc. [default=%default]." )
191
192 parser.add_option("-p", "--pvalue-method", dest="pvalue_method", type="choice",
193 choices = ( "empirical", "norm", ),
194 help="type of pvalue reported [default=%default]." )
195
196 parser.add_option( "--results-file", dest="input_filename_results", type="string",
197 help="start processing from results - no segments required [default=%default]." )
198
199 parser.add_option( "--output-plots-pattern", dest="output_plots_pattern", type="string",
200 help="output pattern for plots [default=%default]" )
201
202 parser.add_option( "--output-samples-pattern", dest="output_samples_pattern", type="string",
203 help="output pattern for samples. Samples are stored in bed format, one for "
204 " each segment [default=%default]" )
205
206 parser.add_option( "--plots", dest="plots", type="choice",
207 choices = ( "all",
208 "bars-per-track",
209 "bars", ),
210 help="plots to be created [default=%default]." )
211
212 parser.set_defaults(
213 sample_files = [],
214 num_samples = 1000,
215 output_stats = [],
216 output_filename_counts = None,
217 output_order = "fold",
218 input_filename_results = None,
219 pvalue_method = "empirical",
220 output_plots_pattern = None,
221 )
222
223 ## add common options (-h/--help, ...) and parse command line
224 (options, args) = E.Start( parser, argv = argv, add_output_options = True )
225
226 annotator_results = IO.readAnnotatorResults( options.input_filename_results )
227
228 if "speparate-bars" in options.plots:
229 plotBarplots( annotator_results, options )
230 if "bars" in options.plots:
231 plotBarplot( annotator_results, options )
232
233 ## write footer and output benchmark information.
234 E.Stop()
235
236 if __name__ == "__main__":
237 sys.exit( main( sys.argv) )