Mercurial > repos > fubar > genomic_association_tester
comparison rlGAT/gat-plot.py @ 11:53487f21c0d5 draft
Uploaded
| author | fubar |
|---|---|
| date | Thu, 29 Aug 2013 01:57:54 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 10:f04dfb37d1bb | 11:53487f21c0d5 |
|---|---|
| 1 ################################################################################ | |
| 2 # | |
| 3 # MRC FGU Computational Genomics Group | |
| 4 # | |
| 5 # $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $ | |
| 6 # | |
| 7 # Copyright (C) 2009 Andreas Heger | |
| 8 # | |
| 9 # This program is free software; you can redistribute it and/or | |
| 10 # modify it under the terms of the GNU General Public License | |
| 11 # as published by the Free Software Foundation; either version 2 | |
| 12 # of the License, or (at your option) any later version. | |
| 13 # | |
| 14 # This program is distributed in the hope that it will be useful, | |
| 15 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 17 # GNU General Public License for more details. | |
| 18 # | |
| 19 # You should have received a copy of the GNU General Public License | |
| 20 # along with this program; if not, write to the Free Software | |
| 21 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
| 22 ################################################################################# | |
| 23 ''' | |
| 24 gat-plot - plot results from a gat analysis | |
| 25 =========================================== | |
| 26 | |
| 27 :Author: Andreas Heger | |
| 28 :Release: $Id$ | |
| 29 :Date: |today| | |
| 30 :Tags: Python | |
| 31 | |
| 32 Purpose | |
| 33 ------- | |
| 34 | |
| 35 This script takes the results of a ``gat-run.py` or ``gat-compare.py`` | |
| 36 and plots the results. | |
| 37 | |
| 38 This script requires matplotlib. | |
| 39 | |
| 40 Usage | |
| 41 ----- | |
| 42 | |
| 43 Example:: | |
| 44 | |
| 45 python gat-plot.py --input-filename-results=gat.results.tsv.gz | |
| 46 python gat-plot.py --input-filename-counts=gat.counts.tsv.gz | |
| 47 | |
| 48 Type:: | |
| 49 | |
| 50 python gatplot.py --help | |
| 51 | |
| 52 for command line help. | |
| 53 | |
| 54 Documentation | |
| 55 ------------- | |
| 56 | |
| 57 Code | |
| 58 ---- | |
| 59 | |
| 60 ''' | |
| 61 | |
| 62 import os, sys, re, optparse, collections, types, glob, time | |
| 63 import numpy | |
| 64 | |
| 65 import gat | |
| 66 import gat.Experiment as E | |
| 67 import gat.IOTools as IOTools | |
| 68 import gat.IO as IO | |
| 69 | |
| 70 try: | |
| 71 import matplotlib.pyplot as plt | |
| 72 HASPLOT = True | |
| 73 except (ImportError,RuntimeError): | |
| 74 HASPLOT = False | |
| 75 | |
| 76 class DummyAnnotatorResult: | |
| 77 | |
| 78 format_observed = "%i" | |
| 79 format_expected = "%6.4f" | |
| 80 format_fold = "%6.4f" | |
| 81 format_pvalue = "%6.4e" | |
| 82 | |
| 83 def __init__( self ): | |
| 84 pass | |
| 85 | |
| 86 @classmethod | |
| 87 def _fromLine( cls, line ): | |
| 88 x = cls() | |
| 89 data = line[:-1].split("\t") | |
| 90 x.track, x.annotation = data[:2] | |
| 91 x.observed, x.expected, x.lower95, x.upper95, x.stddev, x.fold, x.pvalue, x.qvalue = \ | |
| 92 map(float, data[2:] ) | |
| 93 return x | |
| 94 | |
| 95 def __str__(self): | |
| 96 return "\t".join( (self.track, | |
| 97 self.annotation, | |
| 98 self.format_observed % self.observed, | |
| 99 self.format_expected % self.expected, | |
| 100 self.format_expected % self.lower95, | |
| 101 self.format_expected % self.upper95, | |
| 102 self.format_expected % self.stddev, | |
| 103 self.format_fold % self.fold, | |
| 104 self.format_pvalue % self.pvalue, | |
| 105 self.format_pvalue % self.qvalue ) ) | |
| 106 | |
| 107 def buildPlotFilename( options, key ): | |
| 108 filename = re.sub("%s", key, options.output_plots_pattern) | |
| 109 filename = re.sub("[^a-zA-Z0-9-_./]", "_", filename ) | |
| 110 dirname = os.path.dirname( filename ) | |
| 111 if dirname and not os.path.exists( dirname ): os.makedirs( dirname ) | |
| 112 return filename | |
| 113 | |
| 114 def plotBarplots( annotator_results, options ): | |
| 115 '''output a series of bar-plots. | |
| 116 | |
| 117 Output for each track. | |
| 118 | |
| 119 Significant results are opaque, while | |
| 120 non-significant results are transparent.''' | |
| 121 | |
| 122 for track in annotator_results: | |
| 123 plt.figure() | |
| 124 r = annotator_results[track] | |
| 125 keys, values = zip( *r.items()) | |
| 126 pos = range(len(r)) | |
| 127 bars = plt.barh( pos, [x.fold for x in values] ) | |
| 128 for b,v in zip(bars, values): | |
| 129 if v.qvalue > 0.05: b.set_alpha( 0.10 ) | |
| 130 | |
| 131 filename = buildPlotFilename( options, "bars-%s" % track ) | |
| 132 plt.yticks( pos, keys ) | |
| 133 plt.axvline( x=1, color="r") | |
| 134 plt.savefig( filename ) | |
| 135 | |
| 136 def plotBarplot( annotator_results, options ): | |
| 137 '''output a single bar-plots. | |
| 138 | |
| 139 Output for each track. | |
| 140 | |
| 141 Significant results are opaque, while | |
| 142 non-significant results are transparent.''' | |
| 143 | |
| 144 ntracks = len(annotator_results ) | |
| 145 height = 1.0 / float(ntracks) | |
| 146 | |
| 147 plt.figure() | |
| 148 | |
| 149 for trackid, track in enumerate(annotator_results): | |
| 150 | |
| 151 r = annotator_results[track] | |
| 152 rr = r.items() | |
| 153 rr.sort() | |
| 154 keys, values = zip(*rr) | |
| 155 pos = numpy.arange(0,len(r),1) + trackid * height | |
| 156 bars = plt.barh( pos, | |
| 157 [x.fold for x in values], | |
| 158 height = height, | |
| 159 label = track, | |
| 160 xerr = [x.stddev / x.expected for x in values], | |
| 161 color = "bryg"[trackid % 4]) | |
| 162 for b,v in zip(bars, values): | |
| 163 if v.pvalue > 0.05: b.set_alpha( 0.10 ) | |
| 164 | |
| 165 pos = range(len(r)) | |
| 166 | |
| 167 plt.yticks( pos, keys ) | |
| 168 plt.axvline(x=1, color = "r" ) | |
| 169 filename = buildPlotFilename( options, "bars-all" ) | |
| 170 plt.legend() | |
| 171 plt.savefig( filename ) | |
| 172 | |
| 173 def main( argv = None ): | |
| 174 """script main. | |
| 175 | |
| 176 parses command line options in sys.argv, unless *argv* is given. | |
| 177 """ | |
| 178 | |
| 179 if not argv: argv = sys.argv | |
| 180 | |
| 181 # setup command line parser | |
| 182 parser = optparse.OptionParser( version = "%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $", | |
| 183 usage = globals()["__doc__"] ) | |
| 184 | |
| 185 parser.add_option("-l", "--sample-file", dest="sample_files", type="string", action="append", | |
| 186 help="filename with sample files. Start processing from samples [default=%default]." ) | |
| 187 | |
| 188 parser.add_option("-o", "--order", dest="output_order", type="choice", | |
| 189 choices = ( "track", "annotation", "fold", "pvalue", "qvalue" ), | |
| 190 help="order results in output by fold, track, etc. [default=%default]." ) | |
| 191 | |
| 192 parser.add_option("-p", "--pvalue-method", dest="pvalue_method", type="choice", | |
| 193 choices = ( "empirical", "norm", ), | |
| 194 help="type of pvalue reported [default=%default]." ) | |
| 195 | |
| 196 parser.add_option( "--results-file", dest="input_filename_results", type="string", | |
| 197 help="start processing from results - no segments required [default=%default]." ) | |
| 198 | |
| 199 parser.add_option( "--output-plots-pattern", dest="output_plots_pattern", type="string", | |
| 200 help="output pattern for plots [default=%default]" ) | |
| 201 | |
| 202 parser.add_option( "--output-samples-pattern", dest="output_samples_pattern", type="string", | |
| 203 help="output pattern for samples. Samples are stored in bed format, one for " | |
| 204 " each segment [default=%default]" ) | |
| 205 | |
| 206 parser.add_option( "--plots", dest="plots", type="choice", | |
| 207 choices = ( "all", | |
| 208 "bars-per-track", | |
| 209 "bars", ), | |
| 210 help="plots to be created [default=%default]." ) | |
| 211 | |
| 212 parser.set_defaults( | |
| 213 sample_files = [], | |
| 214 num_samples = 1000, | |
| 215 output_stats = [], | |
| 216 output_filename_counts = None, | |
| 217 output_order = "fold", | |
| 218 input_filename_results = None, | |
| 219 pvalue_method = "empirical", | |
| 220 output_plots_pattern = None, | |
| 221 ) | |
| 222 | |
| 223 ## add common options (-h/--help, ...) and parse command line | |
| 224 (options, args) = E.Start( parser, argv = argv, add_output_options = True ) | |
| 225 | |
| 226 annotator_results = IO.readAnnotatorResults( options.input_filename_results ) | |
| 227 | |
| 228 if "speparate-bars" in options.plots: | |
| 229 plotBarplots( annotator_results, options ) | |
| 230 if "bars" in options.plots: | |
| 231 plotBarplot( annotator_results, options ) | |
| 232 | |
| 233 ## write footer and output benchmark information. | |
| 234 E.Stop() | |
| 235 | |
| 236 if __name__ == "__main__": | |
| 237 sys.exit( main( sys.argv) ) |
