Mercurial > repos > fubar > genomic_association_tester
diff rlGAT/gat-plot.py @ 11:53487f21c0d5 draft
Uploaded
| author | fubar |
|---|---|
| date | Thu, 29 Aug 2013 01:57:54 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rlGAT/gat-plot.py Thu Aug 29 01:57:54 2013 -0400 @@ -0,0 +1,237 @@ +################################################################################ +# +# MRC FGU Computational Genomics Group +# +# $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $ +# +# Copyright (C) 2009 Andreas Heger +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +################################################################################# +''' +gat-plot - plot results from a gat analysis +=========================================== + +:Author: Andreas Heger +:Release: $Id$ +:Date: |today| +:Tags: Python + +Purpose +------- + +This script takes the results of a ``gat-run.py` or ``gat-compare.py`` +and plots the results. + +This script requires matplotlib. + +Usage +----- + +Example:: + + python gat-plot.py --input-filename-results=gat.results.tsv.gz + python gat-plot.py --input-filename-counts=gat.counts.tsv.gz + +Type:: + + python gatplot.py --help + +for command line help. + +Documentation +------------- + +Code +---- + +''' + +import os, sys, re, optparse, collections, types, glob, time +import numpy + +import gat +import gat.Experiment as E +import gat.IOTools as IOTools +import gat.IO as IO + +try: + import matplotlib.pyplot as plt + HASPLOT = True +except (ImportError,RuntimeError): + HASPLOT = False + +class DummyAnnotatorResult: + + format_observed = "%i" + format_expected = "%6.4f" + format_fold = "%6.4f" + format_pvalue = "%6.4e" + + def __init__( self ): + pass + + @classmethod + def _fromLine( cls, line ): + x = cls() + data = line[:-1].split("\t") + x.track, x.annotation = data[:2] + x.observed, x.expected, x.lower95, x.upper95, x.stddev, x.fold, x.pvalue, x.qvalue = \ + map(float, data[2:] ) + return x + + def __str__(self): + return "\t".join( (self.track, + self.annotation, + self.format_observed % self.observed, + self.format_expected % self.expected, + self.format_expected % self.lower95, + self.format_expected % self.upper95, + self.format_expected % self.stddev, + self.format_fold % self.fold, + self.format_pvalue % self.pvalue, + self.format_pvalue % self.qvalue ) ) + +def buildPlotFilename( options, key ): + filename = re.sub("%s", key, options.output_plots_pattern) + filename = re.sub("[^a-zA-Z0-9-_./]", "_", filename ) + dirname = os.path.dirname( filename ) + if dirname and not os.path.exists( dirname ): os.makedirs( dirname ) + return filename + +def plotBarplots( annotator_results, options ): + '''output a series of bar-plots. + + Output for each track. + + Significant results are opaque, while + non-significant results are transparent.''' + + for track in annotator_results: + plt.figure() + r = annotator_results[track] + keys, values = zip( *r.items()) + pos = range(len(r)) + bars = plt.barh( pos, [x.fold for x in values] ) + for b,v in zip(bars, values): + if v.qvalue > 0.05: b.set_alpha( 0.10 ) + + filename = buildPlotFilename( options, "bars-%s" % track ) + plt.yticks( pos, keys ) + plt.axvline( x=1, color="r") + plt.savefig( filename ) + +def plotBarplot( annotator_results, options ): + '''output a single bar-plots. + + Output for each track. + + Significant results are opaque, while + non-significant results are transparent.''' + + ntracks = len(annotator_results ) + height = 1.0 / float(ntracks) + + plt.figure() + + for trackid, track in enumerate(annotator_results): + + r = annotator_results[track] + rr = r.items() + rr.sort() + keys, values = zip(*rr) + pos = numpy.arange(0,len(r),1) + trackid * height + bars = plt.barh( pos, + [x.fold for x in values], + height = height, + label = track, + xerr = [x.stddev / x.expected for x in values], + color = "bryg"[trackid % 4]) + for b,v in zip(bars, values): + if v.pvalue > 0.05: b.set_alpha( 0.10 ) + + pos = range(len(r)) + + plt.yticks( pos, keys ) + plt.axvline(x=1, color = "r" ) + filename = buildPlotFilename( options, "bars-all" ) + plt.legend() + plt.savefig( filename ) + +def main( argv = None ): + """script main. + + parses command line options in sys.argv, unless *argv* is given. + """ + + if not argv: argv = sys.argv + + # setup command line parser + parser = optparse.OptionParser( version = "%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $", + usage = globals()["__doc__"] ) + + parser.add_option("-l", "--sample-file", dest="sample_files", type="string", action="append", + help="filename with sample files. Start processing from samples [default=%default]." ) + + parser.add_option("-o", "--order", dest="output_order", type="choice", + choices = ( "track", "annotation", "fold", "pvalue", "qvalue" ), + help="order results in output by fold, track, etc. [default=%default]." ) + + parser.add_option("-p", "--pvalue-method", dest="pvalue_method", type="choice", + choices = ( "empirical", "norm", ), + help="type of pvalue reported [default=%default]." ) + + parser.add_option( "--results-file", dest="input_filename_results", type="string", + help="start processing from results - no segments required [default=%default]." ) + + parser.add_option( "--output-plots-pattern", dest="output_plots_pattern", type="string", + help="output pattern for plots [default=%default]" ) + + parser.add_option( "--output-samples-pattern", dest="output_samples_pattern", type="string", + help="output pattern for samples. Samples are stored in bed format, one for " + " each segment [default=%default]" ) + + parser.add_option( "--plots", dest="plots", type="choice", + choices = ( "all", + "bars-per-track", + "bars", ), + help="plots to be created [default=%default]." ) + + parser.set_defaults( + sample_files = [], + num_samples = 1000, + output_stats = [], + output_filename_counts = None, + output_order = "fold", + input_filename_results = None, + pvalue_method = "empirical", + output_plots_pattern = None, + ) + + ## add common options (-h/--help, ...) and parse command line + (options, args) = E.Start( parser, argv = argv, add_output_options = True ) + + annotator_results = IO.readAnnotatorResults( options.input_filename_results ) + + if "speparate-bars" in options.plots: + plotBarplots( annotator_results, options ) + if "bars" in options.plots: + plotBarplot( annotator_results, options ) + + ## write footer and output benchmark information. + E.Stop() + +if __name__ == "__main__": + sys.exit( main( sys.argv) )
