Mercurial > repos > fubar > genomic_association_tester
comparison rlGAT/gat-run.py @ 11:53487f21c0d5 draft
Uploaded
| author | fubar |
|---|---|
| date | Thu, 29 Aug 2013 01:57:54 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 10:f04dfb37d1bb | 11:53487f21c0d5 |
|---|---|
| 1 ################################################################################ | |
| 2 # | |
| 3 # MRC FGU Computational Genomics Group | |
| 4 # | |
| 5 # $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $ | |
| 6 # | |
| 7 # Copyright (C) 2009 Andreas Heger | |
| 8 # | |
| 9 # This program is free software; you can redistribute it and/or | |
| 10 # modify it under the terms of the GNU General Public License | |
| 11 # as published by the Free Software Foundation; either version 2 | |
| 12 # of the License, or (at your option) any later version. | |
| 13 # | |
| 14 # This program is distributed in the hope that it will be useful, | |
| 15 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 17 # GNU General Public License for more details. | |
| 18 # | |
| 19 # You should have received a copy of the GNU General Public License | |
| 20 # along with this program; if not, write to the Free Software | |
| 21 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
| 22 ################################################################################# | |
| 23 ''' | |
| 24 gat-run - run the genomic annotation tool | |
| 25 ============================================= | |
| 26 | |
| 27 :Author: Andreas Heger | |
| 28 :Release: $Id$ | |
| 29 :Date: |today| | |
| 30 :Tags: Python | |
| 31 | |
| 32 Purpose | |
| 33 ------- | |
| 34 | |
| 35 This script compares one or more genomic segments of interest | |
| 36 against one more other genomic annotations. | |
| 37 | |
| 38 Usage | |
| 39 ----- | |
| 40 | |
| 41 Example:: | |
| 42 | |
| 43 python gat-run.py | |
| 44 --segment-file=segments.bed.gz | |
| 45 --workspace-file=workspace.bed.gz | |
| 46 --annotation-file=annotations_architecture.bed.gz | |
| 47 | |
| 48 Type:: | |
| 49 | |
| 50 python gat-run.py --help | |
| 51 | |
| 52 for command line help. | |
| 53 | |
| 54 Documentation | |
| 55 ------------- | |
| 56 | |
| 57 Code | |
| 58 ---- | |
| 59 | |
| 60 ''' | |
| 61 | |
| 62 import os, sys, re, optparse, collections, types, glob, time | |
| 63 import numpy | |
| 64 | |
| 65 import gat | |
| 66 import gat.Experiment as E | |
| 67 import gat.IOTools as IOTools | |
| 68 import gat.IO as IO | |
| 69 import gat.Stats as Stats | |
| 70 import GatSegmentList | |
| 71 import GatEngine | |
| 72 | |
| 73 def fromSegments( options, args ): | |
| 74 '''run analysis from segment files. | |
| 75 | |
| 76 This is the most common use case. | |
| 77 ''' | |
| 78 | |
| 79 tstart = time.time() | |
| 80 | |
| 81 ################################################## | |
| 82 ################################################## | |
| 83 ################################################## | |
| 84 ## build segments | |
| 85 segments, annotations, workspaces, isochores = IO.buildSegments( options ) | |
| 86 | |
| 87 E.info( "intervals loaded in %i seconds" % (time.time() - tstart) ) | |
| 88 | |
| 89 ################################################## | |
| 90 ################################################## | |
| 91 ################################################## | |
| 92 ## open various additional output files | |
| 93 ################################################## | |
| 94 outfiles = {} | |
| 95 for section in ("sample", | |
| 96 "segment_metrics", | |
| 97 "sample_metrics", | |
| 98 ): | |
| 99 if section in options.output_stats or \ | |
| 100 "all" in options.output_stats or \ | |
| 101 len( [ x for x in options.output_stats if re.search( x, "section" ) ] ) > 0: | |
| 102 outfiles[section] = E.openOutputFile(section) | |
| 103 | |
| 104 if 'sample_metrics' in outfiles: | |
| 105 outfiles['sample_metrics'].write( "track\tsection\tmetric\t%s\n" % "\t".join(Stats.Summary().getHeaders() )) | |
| 106 | |
| 107 # filter segments by workspace | |
| 108 workspace = IO.applyIsochores( segments, | |
| 109 annotations, | |
| 110 workspaces, | |
| 111 options, | |
| 112 isochores, | |
| 113 truncate_segments_to_workspace = options.truncate_segments_to_workspace, | |
| 114 truncate_workspace_to_annotations = options.truncate_workspace_to_annotations, | |
| 115 restrict_workspace = options.restrict_workspace ) | |
| 116 | |
| 117 ################################################## | |
| 118 ################################################## | |
| 119 ################################################## | |
| 120 ## check memory requirements | |
| 121 counts = segments.countsPerTrack() | |
| 122 max_counts = max(counts.values()) | |
| 123 # previous algorithm: memory requirements if all samples are stored | |
| 124 memory = 8 * 2 * options.num_samples * max_counts * len(workspace) | |
| 125 | |
| 126 ################################################## | |
| 127 ################################################## | |
| 128 ################################################## | |
| 129 # initialize sampler | |
| 130 if options.sampler == "annotator": | |
| 131 sampler = GatEngine.SamplerAnnotator( | |
| 132 bucket_size = options.bucket_size, | |
| 133 nbuckets = options.nbuckets ) | |
| 134 elif options.sampler == "shift": | |
| 135 sampler = GatEngine.SamplerShift( | |
| 136 radius = options.shift_expansion, | |
| 137 extension = options.shift_extension ) | |
| 138 elif options.sampler == "segments": | |
| 139 sampler = GatEngine.SamplerSegments() | |
| 140 elif options.sampler == "local-permutation": | |
| 141 sampler = GatEngine.SamplerLocalPermutation() | |
| 142 elif options.sampler == "global-permutation": | |
| 143 sampler = GatEngine.SamplerGlobalPermutation() | |
| 144 elif options.sampler == "brute-force": | |
| 145 sampler = GatEngine.SamplerBruteForce() | |
| 146 elif options.sampler == "uniform": | |
| 147 sampler = GatEngine.SamplerUniform() | |
| 148 | |
| 149 ################################################## | |
| 150 ################################################## | |
| 151 ################################################## | |
| 152 # initialize counter | |
| 153 counters = [] | |
| 154 for counter in options.counters: | |
| 155 if counter == "nucleotide-overlap": | |
| 156 counters.append( GatEngine.CounterNucleotideOverlap() ) | |
| 157 elif counter == "nucleotide-density": | |
| 158 counters.append( GatEngine.CounterNucleotideDensity() ) | |
| 159 elif counter == "segment-overlap": | |
| 160 counters.append( GatEngine.CounterSegmentOverlap() ) | |
| 161 elif counter == "annotations-overlap": | |
| 162 counters.append( GatEngine.CounterAnnotationsOverlap() ) | |
| 163 elif counter == "segment-midoverlap": | |
| 164 counters.append( GatEngine.CounterSegmentMidpointOverlap() ) | |
| 165 elif counter == "annotations-midoverlap": | |
| 166 counters.append( GatEngine.CounterAnnotationsMidpointOverlap() ) | |
| 167 else: | |
| 168 raise ValueError("unknown counter '%s'" % counter ) | |
| 169 | |
| 170 ################################################## | |
| 171 ################################################## | |
| 172 ################################################## | |
| 173 ## initialize workspace generator | |
| 174 if options.conditional == "unconditional": | |
| 175 workspace_generator = GatEngine.UnconditionalWorkspace() | |
| 176 elif options.conditional == "cooccurance": | |
| 177 workspace_generator = GatEngine.ConditionalWorkspaceCooccurance() | |
| 178 elif options.conditional == "annotation-centered": | |
| 179 if options.conditional_extension == options.conditional_expansion == None: | |
| 180 raise ValueError( "please specify either --conditional-expansion or --conditional-extension" ) | |
| 181 workspace_generator = GatEngine.ConditionalWorkspaceAnnotationCentered( options.conditional_extension, | |
| 182 options.conditional_expansion ) | |
| 183 elif options.conditional == "segment-centered": | |
| 184 if options.conditional_extension == options.conditional_expansion == None: | |
| 185 raise ValueError( "please specify either --conditional-expansion or --conditional-extension" ) | |
| 186 | |
| 187 workspace_generator = GatEngine.ConditionalWorkspaceSegmentCentered( options.conditional_extension, | |
| 188 options.conditional_expansion ) | |
| 189 else: | |
| 190 raise ValueError("unknown conditional workspace '%s'" % options.conditional ) | |
| 191 | |
| 192 | |
| 193 ################################################## | |
| 194 ################################################## | |
| 195 ################################################## | |
| 196 ## check if reference is compplete | |
| 197 ################################################## | |
| 198 if options.reference: | |
| 199 reference = options.reference | |
| 200 for track in segments.tracks: | |
| 201 if track not in options.reference: | |
| 202 raise ValueError("missing track '%s' in reference" % track ) | |
| 203 r = options.reference[track] | |
| 204 for annotation in annotations.tracks: | |
| 205 if annotation not in r: | |
| 206 raise ValueError("missing annotation '%s' in annotations for track='%s'" % (annotation, track )) | |
| 207 | |
| 208 | |
| 209 ################################################## | |
| 210 ################################################## | |
| 211 ################################################## | |
| 212 ## compute | |
| 213 ################################################## | |
| 214 annotator_results = gat.run( segments, | |
| 215 annotations, | |
| 216 workspace, | |
| 217 sampler, | |
| 218 counters, | |
| 219 workspace_generator = workspace_generator, | |
| 220 num_samples = options.num_samples, | |
| 221 cache = options.cache, | |
| 222 outfiles = outfiles, | |
| 223 output_counts_pattern = options.output_counts_pattern, | |
| 224 output_samples_pattern = options.output_samples_pattern, | |
| 225 sample_files = options.sample_files, | |
| 226 conditional = options.conditional, | |
| 227 conditional_extension = options.conditional_extension, | |
| 228 reference = options.reference, | |
| 229 pseudo_count = options.pseudo_count, | |
| 230 num_threads = options.num_threads ) | |
| 231 | |
| 232 return annotator_results | |
| 233 | |
| 234 | |
| 235 def main( argv = None ): | |
| 236 """script main. | |
| 237 | |
| 238 parses command line options in sys.argv, unless *argv* is given. | |
| 239 """ | |
| 240 | |
| 241 if not argv: argv = sys.argv | |
| 242 | |
| 243 parser = gat.buildParser( usage = globals()["__doc__"] ) | |
| 244 | |
| 245 ## add common options (-h/--help, ...) and parse command line | |
| 246 (options, args) = E.Start( parser, argv = argv, add_output_options = True ) | |
| 247 | |
| 248 ################################################## | |
| 249 description_header, descriptions, description_width = IO.readDescriptions( options ) | |
| 250 | |
| 251 ################################################## | |
| 252 size_pos, size_segment = GatSegmentList.getSegmentSize() | |
| 253 E.debug( "sizes: pos=%i segment=%i, max_coord=%i" % (size_pos, size_segment, 2**(8 * size_pos ))) | |
| 254 | |
| 255 ################################################## | |
| 256 # set default counter | |
| 257 if not options.counters: | |
| 258 options.counters.append( "nucleotide-overlap" ) | |
| 259 | |
| 260 ################################################## | |
| 261 if options.output_tables_pattern != None: | |
| 262 if "%s" not in options.output_tables_pattern: | |
| 263 raise ValueError( "output_tables_pattern should contain at least one '%s'") | |
| 264 | |
| 265 if options.output_samples_pattern != None: | |
| 266 if "%s" not in options.output_samples_pattern: | |
| 267 raise ValueError( "output_samples_pattern should contain at least one '%s'") | |
| 268 | |
| 269 if options.output_counts_pattern != None: | |
| 270 if "%s" not in options.output_counts_pattern: | |
| 271 raise ValueError( "output_counts_pattern should contain at least one '%s'") | |
| 272 | |
| 273 | |
| 274 ################################################## | |
| 275 # read fold changes that results should be compared with | |
| 276 if options.null != "default": | |
| 277 if not os.path.exists( options.null ): | |
| 278 raise OSError( "file %s not found" % options.null ) | |
| 279 E.info( "reading reference results from %s" % options.null ) | |
| 280 options.reference = IO.readAnnotatorResults( options.null ) | |
| 281 else: | |
| 282 options.reference = None | |
| 283 | |
| 284 if options.input_filename_counts: | |
| 285 # use pre-computed counts | |
| 286 annotator_results = GatEngine.fromCounts( options.input_filename_counts ) | |
| 287 | |
| 288 elif options.input_filename_results: | |
| 289 # use previous results (re-computes fdr) | |
| 290 E.info( "reading gat results from %s" % options.input_filename_results ) | |
| 291 annotator_results = IO.readAnnotatorResults( options.input_filename_results ) | |
| 292 | |
| 293 else: | |
| 294 # do full gat analysis | |
| 295 annotator_results = fromSegments( options, args ) | |
| 296 | |
| 297 ################################################## | |
| 298 if options.pvalue_method != "empirical": | |
| 299 E.info("updating pvalues to %s" % options.pvalue_method ) | |
| 300 GatEngine.updatePValues( annotator_results, options.pvalue_method ) | |
| 301 | |
| 302 ################################################## | |
| 303 ## output | |
| 304 IO.outputResults( annotator_results, | |
| 305 options, | |
| 306 GatEngine.AnnotatorResultExtended.headers, | |
| 307 description_header, | |
| 308 description_width, | |
| 309 descriptions ) | |
| 310 | |
| 311 IO.plotResults( annotator_results, options ) | |
| 312 | |
| 313 ## write footer and output benchmark information. | |
| 314 E.Stop() | |
| 315 | |
| 316 if __name__ == "__main__": | |
| 317 sys.exit( main( sys.argv) ) |
