Mercurial > repos > fubar > genomic_association_tester
view rlGAT/gat-run.py @ 11:53487f21c0d5 draft
Uploaded
| author | fubar |
|---|---|
| date | Thu, 29 Aug 2013 01:57:54 -0400 |
| parents | |
| children |
line wrap: on
line source
################################################################################ # # MRC FGU Computational Genomics Group # # $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $ # # Copyright (C) 2009 Andreas Heger # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ################################################################################# ''' gat-run - run the genomic annotation tool ============================================= :Author: Andreas Heger :Release: $Id$ :Date: |today| :Tags: Python Purpose ------- This script compares one or more genomic segments of interest against one more other genomic annotations. Usage ----- Example:: python gat-run.py --segment-file=segments.bed.gz --workspace-file=workspace.bed.gz --annotation-file=annotations_architecture.bed.gz Type:: python gat-run.py --help for command line help. Documentation ------------- Code ---- ''' import os, sys, re, optparse, collections, types, glob, time import numpy import gat import gat.Experiment as E import gat.IOTools as IOTools import gat.IO as IO import gat.Stats as Stats import GatSegmentList import GatEngine def fromSegments( options, args ): '''run analysis from segment files. This is the most common use case. ''' tstart = time.time() ################################################## ################################################## ################################################## ## build segments segments, annotations, workspaces, isochores = IO.buildSegments( options ) E.info( "intervals loaded in %i seconds" % (time.time() - tstart) ) ################################################## ################################################## ################################################## ## open various additional output files ################################################## outfiles = {} for section in ("sample", "segment_metrics", "sample_metrics", ): if section in options.output_stats or \ "all" in options.output_stats or \ len( [ x for x in options.output_stats if re.search( x, "section" ) ] ) > 0: outfiles[section] = E.openOutputFile(section) if 'sample_metrics' in outfiles: outfiles['sample_metrics'].write( "track\tsection\tmetric\t%s\n" % "\t".join(Stats.Summary().getHeaders() )) # filter segments by workspace workspace = IO.applyIsochores( segments, annotations, workspaces, options, isochores, truncate_segments_to_workspace = options.truncate_segments_to_workspace, truncate_workspace_to_annotations = options.truncate_workspace_to_annotations, restrict_workspace = options.restrict_workspace ) ################################################## ################################################## ################################################## ## check memory requirements counts = segments.countsPerTrack() max_counts = max(counts.values()) # previous algorithm: memory requirements if all samples are stored memory = 8 * 2 * options.num_samples * max_counts * len(workspace) ################################################## ################################################## ################################################## # initialize sampler if options.sampler == "annotator": sampler = GatEngine.SamplerAnnotator( bucket_size = options.bucket_size, nbuckets = options.nbuckets ) elif options.sampler == "shift": sampler = GatEngine.SamplerShift( radius = options.shift_expansion, extension = options.shift_extension ) elif options.sampler == "segments": sampler = GatEngine.SamplerSegments() elif options.sampler == "local-permutation": sampler = GatEngine.SamplerLocalPermutation() elif options.sampler == "global-permutation": sampler = GatEngine.SamplerGlobalPermutation() elif options.sampler == "brute-force": sampler = GatEngine.SamplerBruteForce() elif options.sampler == "uniform": sampler = GatEngine.SamplerUniform() ################################################## ################################################## ################################################## # initialize counter counters = [] for counter in options.counters: if counter == "nucleotide-overlap": counters.append( GatEngine.CounterNucleotideOverlap() ) elif counter == "nucleotide-density": counters.append( GatEngine.CounterNucleotideDensity() ) elif counter == "segment-overlap": counters.append( GatEngine.CounterSegmentOverlap() ) elif counter == "annotations-overlap": counters.append( GatEngine.CounterAnnotationsOverlap() ) elif counter == "segment-midoverlap": counters.append( GatEngine.CounterSegmentMidpointOverlap() ) elif counter == "annotations-midoverlap": counters.append( GatEngine.CounterAnnotationsMidpointOverlap() ) else: raise ValueError("unknown counter '%s'" % counter ) ################################################## ################################################## ################################################## ## initialize workspace generator if options.conditional == "unconditional": workspace_generator = GatEngine.UnconditionalWorkspace() elif options.conditional == "cooccurance": workspace_generator = GatEngine.ConditionalWorkspaceCooccurance() elif options.conditional == "annotation-centered": if options.conditional_extension == options.conditional_expansion == None: raise ValueError( "please specify either --conditional-expansion or --conditional-extension" ) workspace_generator = GatEngine.ConditionalWorkspaceAnnotationCentered( options.conditional_extension, options.conditional_expansion ) elif options.conditional == "segment-centered": if options.conditional_extension == options.conditional_expansion == None: raise ValueError( "please specify either --conditional-expansion or --conditional-extension" ) workspace_generator = GatEngine.ConditionalWorkspaceSegmentCentered( options.conditional_extension, options.conditional_expansion ) else: raise ValueError("unknown conditional workspace '%s'" % options.conditional ) ################################################## ################################################## ################################################## ## check if reference is compplete ################################################## if options.reference: reference = options.reference for track in segments.tracks: if track not in options.reference: raise ValueError("missing track '%s' in reference" % track ) r = options.reference[track] for annotation in annotations.tracks: if annotation not in r: raise ValueError("missing annotation '%s' in annotations for track='%s'" % (annotation, track )) ################################################## ################################################## ################################################## ## compute ################################################## annotator_results = gat.run( segments, annotations, workspace, sampler, counters, workspace_generator = workspace_generator, num_samples = options.num_samples, cache = options.cache, outfiles = outfiles, output_counts_pattern = options.output_counts_pattern, output_samples_pattern = options.output_samples_pattern, sample_files = options.sample_files, conditional = options.conditional, conditional_extension = options.conditional_extension, reference = options.reference, pseudo_count = options.pseudo_count, num_threads = options.num_threads ) return annotator_results def main( argv = None ): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv parser = gat.buildParser( usage = globals()["__doc__"] ) ## add common options (-h/--help, ...) and parse command line (options, args) = E.Start( parser, argv = argv, add_output_options = True ) ################################################## description_header, descriptions, description_width = IO.readDescriptions( options ) ################################################## size_pos, size_segment = GatSegmentList.getSegmentSize() E.debug( "sizes: pos=%i segment=%i, max_coord=%i" % (size_pos, size_segment, 2**(8 * size_pos ))) ################################################## # set default counter if not options.counters: options.counters.append( "nucleotide-overlap" ) ################################################## if options.output_tables_pattern != None: if "%s" not in options.output_tables_pattern: raise ValueError( "output_tables_pattern should contain at least one '%s'") if options.output_samples_pattern != None: if "%s" not in options.output_samples_pattern: raise ValueError( "output_samples_pattern should contain at least one '%s'") if options.output_counts_pattern != None: if "%s" not in options.output_counts_pattern: raise ValueError( "output_counts_pattern should contain at least one '%s'") ################################################## # read fold changes that results should be compared with if options.null != "default": if not os.path.exists( options.null ): raise OSError( "file %s not found" % options.null ) E.info( "reading reference results from %s" % options.null ) options.reference = IO.readAnnotatorResults( options.null ) else: options.reference = None if options.input_filename_counts: # use pre-computed counts annotator_results = GatEngine.fromCounts( options.input_filename_counts ) elif options.input_filename_results: # use previous results (re-computes fdr) E.info( "reading gat results from %s" % options.input_filename_results ) annotator_results = IO.readAnnotatorResults( options.input_filename_results ) else: # do full gat analysis annotator_results = fromSegments( options, args ) ################################################## if options.pvalue_method != "empirical": E.info("updating pvalues to %s" % options.pvalue_method ) GatEngine.updatePValues( annotator_results, options.pvalue_method ) ################################################## ## output IO.outputResults( annotator_results, options, GatEngine.AnnotatorResultExtended.headers, description_header, description_width, descriptions ) IO.plotResults( annotator_results, options ) ## write footer and output benchmark information. E.Stop() if __name__ == "__main__": sys.exit( main( sys.argv) )
