annotate rlGAT/gat-plot.py @ 11:53487f21c0d5 draft

Uploaded
author fubar
date Thu, 29 Aug 2013 01:57:54 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
1 ################################################################################
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
2 #
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
3 # MRC FGU Computational Genomics Group
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
4 #
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
5 # $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
6 #
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
7 # Copyright (C) 2009 Andreas Heger
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
8 #
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
9 # This program is free software; you can redistribute it and/or
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
10 # modify it under the terms of the GNU General Public License
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
11 # as published by the Free Software Foundation; either version 2
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
12 # of the License, or (at your option) any later version.
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
13 #
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
14 # This program is distributed in the hope that it will be useful,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
17 # GNU General Public License for more details.
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
18 #
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
19 # You should have received a copy of the GNU General Public License
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
20 # along with this program; if not, write to the Free Software
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
21 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
22 #################################################################################
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
23 '''
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
24 gat-plot - plot results from a gat analysis
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
25 ===========================================
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
26
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
27 :Author: Andreas Heger
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
28 :Release: $Id$
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
29 :Date: |today|
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
30 :Tags: Python
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
31
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
32 Purpose
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
33 -------
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
34
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
35 This script takes the results of a ``gat-run.py` or ``gat-compare.py``
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
36 and plots the results.
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
37
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
38 This script requires matplotlib.
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
39
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
40 Usage
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
41 -----
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
42
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
43 Example::
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
44
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
45 python gat-plot.py --input-filename-results=gat.results.tsv.gz
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
46 python gat-plot.py --input-filename-counts=gat.counts.tsv.gz
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
47
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
48 Type::
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
49
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
50 python gatplot.py --help
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
51
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
52 for command line help.
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
53
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
54 Documentation
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
55 -------------
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
56
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
57 Code
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
58 ----
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
59
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
60 '''
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
61
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
62 import os, sys, re, optparse, collections, types, glob, time
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
63 import numpy
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
64
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
65 import gat
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
66 import gat.Experiment as E
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
67 import gat.IOTools as IOTools
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
68 import gat.IO as IO
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
69
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
70 try:
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
71 import matplotlib.pyplot as plt
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
72 HASPLOT = True
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
73 except (ImportError,RuntimeError):
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
74 HASPLOT = False
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
75
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
76 class DummyAnnotatorResult:
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
77
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
78 format_observed = "%i"
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
79 format_expected = "%6.4f"
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
80 format_fold = "%6.4f"
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
81 format_pvalue = "%6.4e"
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
82
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
83 def __init__( self ):
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
84 pass
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
85
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
86 @classmethod
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
87 def _fromLine( cls, line ):
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
88 x = cls()
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
89 data = line[:-1].split("\t")
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
90 x.track, x.annotation = data[:2]
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
91 x.observed, x.expected, x.lower95, x.upper95, x.stddev, x.fold, x.pvalue, x.qvalue = \
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
92 map(float, data[2:] )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
93 return x
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
94
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
95 def __str__(self):
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
96 return "\t".join( (self.track,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
97 self.annotation,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
98 self.format_observed % self.observed,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
99 self.format_expected % self.expected,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
100 self.format_expected % self.lower95,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
101 self.format_expected % self.upper95,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
102 self.format_expected % self.stddev,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
103 self.format_fold % self.fold,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
104 self.format_pvalue % self.pvalue,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
105 self.format_pvalue % self.qvalue ) )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
106
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
107 def buildPlotFilename( options, key ):
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
108 filename = re.sub("%s", key, options.output_plots_pattern)
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
109 filename = re.sub("[^a-zA-Z0-9-_./]", "_", filename )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
110 dirname = os.path.dirname( filename )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
111 if dirname and not os.path.exists( dirname ): os.makedirs( dirname )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
112 return filename
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
113
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
114 def plotBarplots( annotator_results, options ):
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
115 '''output a series of bar-plots.
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
116
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
117 Output for each track.
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
118
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
119 Significant results are opaque, while
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
120 non-significant results are transparent.'''
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
121
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
122 for track in annotator_results:
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
123 plt.figure()
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
124 r = annotator_results[track]
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
125 keys, values = zip( *r.items())
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
126 pos = range(len(r))
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
127 bars = plt.barh( pos, [x.fold for x in values] )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
128 for b,v in zip(bars, values):
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
129 if v.qvalue > 0.05: b.set_alpha( 0.10 )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
130
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
131 filename = buildPlotFilename( options, "bars-%s" % track )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
132 plt.yticks( pos, keys )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
133 plt.axvline( x=1, color="r")
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
134 plt.savefig( filename )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
135
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
136 def plotBarplot( annotator_results, options ):
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
137 '''output a single bar-plots.
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
138
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
139 Output for each track.
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
140
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
141 Significant results are opaque, while
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
142 non-significant results are transparent.'''
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
143
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
144 ntracks = len(annotator_results )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
145 height = 1.0 / float(ntracks)
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
146
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
147 plt.figure()
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
148
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
149 for trackid, track in enumerate(annotator_results):
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
150
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
151 r = annotator_results[track]
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
152 rr = r.items()
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
153 rr.sort()
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
154 keys, values = zip(*rr)
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
155 pos = numpy.arange(0,len(r),1) + trackid * height
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
156 bars = plt.barh( pos,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
157 [x.fold for x in values],
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
158 height = height,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
159 label = track,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
160 xerr = [x.stddev / x.expected for x in values],
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
161 color = "bryg"[trackid % 4])
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
162 for b,v in zip(bars, values):
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
163 if v.pvalue > 0.05: b.set_alpha( 0.10 )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
164
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
165 pos = range(len(r))
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
166
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
167 plt.yticks( pos, keys )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
168 plt.axvline(x=1, color = "r" )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
169 filename = buildPlotFilename( options, "bars-all" )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
170 plt.legend()
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
171 plt.savefig( filename )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
172
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
173 def main( argv = None ):
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
174 """script main.
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
175
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
176 parses command line options in sys.argv, unless *argv* is given.
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
177 """
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
178
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
179 if not argv: argv = sys.argv
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
180
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
181 # setup command line parser
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
182 parser = optparse.OptionParser( version = "%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $",
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
183 usage = globals()["__doc__"] )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
184
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
185 parser.add_option("-l", "--sample-file", dest="sample_files", type="string", action="append",
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
186 help="filename with sample files. Start processing from samples [default=%default]." )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
187
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
188 parser.add_option("-o", "--order", dest="output_order", type="choice",
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
189 choices = ( "track", "annotation", "fold", "pvalue", "qvalue" ),
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
190 help="order results in output by fold, track, etc. [default=%default]." )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
191
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
192 parser.add_option("-p", "--pvalue-method", dest="pvalue_method", type="choice",
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
193 choices = ( "empirical", "norm", ),
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
194 help="type of pvalue reported [default=%default]." )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
195
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
196 parser.add_option( "--results-file", dest="input_filename_results", type="string",
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
197 help="start processing from results - no segments required [default=%default]." )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
198
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
199 parser.add_option( "--output-plots-pattern", dest="output_plots_pattern", type="string",
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
200 help="output pattern for plots [default=%default]" )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
201
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
202 parser.add_option( "--output-samples-pattern", dest="output_samples_pattern", type="string",
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
203 help="output pattern for samples. Samples are stored in bed format, one for "
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
204 " each segment [default=%default]" )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
205
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
206 parser.add_option( "--plots", dest="plots", type="choice",
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
207 choices = ( "all",
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
208 "bars-per-track",
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
209 "bars", ),
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
210 help="plots to be created [default=%default]." )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
211
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
212 parser.set_defaults(
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
213 sample_files = [],
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
214 num_samples = 1000,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
215 output_stats = [],
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
216 output_filename_counts = None,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
217 output_order = "fold",
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
218 input_filename_results = None,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
219 pvalue_method = "empirical",
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
220 output_plots_pattern = None,
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
221 )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
222
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
223 ## add common options (-h/--help, ...) and parse command line
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
224 (options, args) = E.Start( parser, argv = argv, add_output_options = True )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
225
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
226 annotator_results = IO.readAnnotatorResults( options.input_filename_results )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
227
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
228 if "speparate-bars" in options.plots:
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
229 plotBarplots( annotator_results, options )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
230 if "bars" in options.plots:
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
231 plotBarplot( annotator_results, options )
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
232
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
233 ## write footer and output benchmark information.
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
234 E.Stop()
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
235
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
236 if __name__ == "__main__":
53487f21c0d5 Uploaded
fubar
parents:
diff changeset
237 sys.exit( main( sys.argv) )