annotate sicer_wrapper.py @ 0:2862c48684da draft

Imported from capsule None
author devteam
date Thu, 23 Jan 2014 12:31:57 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
2 #Dan Blankenberg
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
3
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
4 """
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
5 A wrapper script for running SICER (spatial clustering approach for the identification of ChIP-enriched regions) region caller.
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
6 """
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
7
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
8 import sys, optparse, os, tempfile, subprocess, shutil
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
9
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
10 CHUNK_SIZE = 2**20 #1mb
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
11
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
12 #HACK! FIXME: allow using all specified builds, would currently require hacking SICER's "GenomeData.py" on the fly.
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
13 VALID_BUILDS = [ 'mm8', 'mm9', 'hg18', 'hg19', 'dm2', 'dm3', 'sacCer1', 'pombe', 'rn4', 'tair8' ]
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
14
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
15 def cleanup_before_exit( tmp_dir ):
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
16 if tmp_dir and os.path.exists( tmp_dir ):
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
17 shutil.rmtree( tmp_dir )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
18
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
19
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
20 def open_file_from_option( filename, mode = 'rb' ):
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
21 if filename:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
22 return open( filename, mode = mode )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
23 return None
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
24
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
25 def add_one_to_file_column( filename, column, split_char = "\t", startswith_skip = None ):
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
26 tmp_out = tempfile.TemporaryFile( mode='w+b' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
27 tmp_in = open( filename )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
28 for line in tmp_in:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
29 if startswith_skip and line.startswith( startswith_skip ):
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
30 tmp_out.write( line )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
31 else:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
32 fields = line.rstrip( '\n\r' ).split( split_char )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
33 if len( fields ) <= column:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
34 tmp_out.write( line )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
35 else:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
36 fields[ column ] = str( int( fields[ column ] ) + 1 )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
37 tmp_out.write( "%s\n" % ( split_char.join( fields ) ) )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
38 tmp_in.close()
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
39 tmp_out.seek( 0 )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
40 tmp_in = open( filename, 'wb' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
41 while True:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
42 chunk = tmp_out.read( CHUNK_SIZE )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
43 if chunk:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
44 tmp_in.write( chunk )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
45 else:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
46 break
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
47 tmp_in.close()
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
48 tmp_out.close()
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
49
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
50 def __main__():
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
51 #Parse Command Line
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
52 parser = optparse.OptionParser()
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
53 #stdout/err
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
54 parser.add_option( '', '--stdout', dest='stdout', action='store', type="string", default=None, help='If specified, the output of stdout will be written to this file.' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
55 parser.add_option( '', '--stderr', dest='stderr', action='store', type="string", default=None, help='If specified, the output of stderr will be written to this file.' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
56 parser.add_option( '', '--fix_off_by_one_errors', dest='fix_off_by_one_errors', action='store_true', default=False, help='If specified, fix off-by-one errors in output files' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
57 #inputs
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
58 parser.add_option( '-b', '--bed_file', dest='bed_file', action='store', type="string", default=None, help='Input ChIP BED file.' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
59 parser.add_option( '-c', '--control_file', dest='control_file', action='store', type="string", default=None, help='Input control BED file.' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
60 parser.add_option( '-d', '--dbkey', dest='dbkey', action='store', type="string", default=None, help='Input dbkey.' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
61 parser.add_option( '-r', '--redundancy_threshold', dest='redundancy_threshold', action='store', type="int", default=1, help='Redundancy Threshold: The number of copies of identical reads allowed in a library.' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
62 parser.add_option( '-w', '--window_size', dest='window_size', action='store', type="int", default=200, help='Window size: resolution of SICER algorithm. For histone modifications, one can use 200 bp' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
63 parser.add_option( '-f', '--fragment_size', dest='fragment_size', action='store', type="int", default=150, help='Fragment size: is for determination of the amount of shift from the beginning of a read to the center of the DNA fragment represented by the read. FRAGMENT_SIZE=150 means the shift is 75.' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
64 parser.add_option( '-e', '--effective_genome_fraction', dest='effective_genome_fraction', action='store', type="float", default=0.74, help='Effective genome fraction: Effective Genome as fraction of the genome size. It depends on read length.' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
65 parser.add_option( '-g', '--gap_size', dest='gap_size', action='store', type="int", default=600, help='Gap size: needs to be multiples of window size. Namely if the window size is 200, the gap size should be 0, 200, 400, 600, ... .' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
66 parser.add_option( '-o', '--error_cut_off', dest='error_cut_off', action='store', type="string", default="0.1", help='Error Cut off: FDR or E-value' ) #read as string to construct names properly
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
67 #outputs
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
68 parser.add_option( '', '--redundancy_removed_test_bed_output_file', dest='redundancy_removed_test_bed_output_file', action='store', type="string", default=None, help='test-1-removed.bed: redundancy_removed test bed file' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
69 parser.add_option( '', '--redundancy_removed_control_bed_output_file', dest='redundancy_removed_control_bed_output_file', action='store', type="string", default=None, help='control-1-removed.bed: redundancy_removed control bed file' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
70 parser.add_option( '', '--summary_graph_output_file', dest='summary_graph_output_file', action='store', type="string", default=None, help='test-W200.graph: summary graph file for test-1-removed.bed with window size 200, in bedGraph format.' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
71 parser.add_option( '', '--test_normalized_wig_output_file', dest='test_normalized_wig_output_file', action='store', type="string", default=None, help='test-W200-normalized.wig: the above file normalized by library size per million and converted into wig format. This file can be uploaded to the UCSC genome browser' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
72 parser.add_option( '', '--score_island_output_file', dest='score_island_output_file', action='store', type="string", default=None, help='test-W200-G600.scoreisland: an intermediate file for debugging usage.' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
73 parser.add_option( '', '--islands_summary_output_file', dest='islands_summary_output_file', action='store', type="string", default=None, help='test-W200-G600-islands-summary: summary of all candidate islands with their statistical significance.' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
74 parser.add_option( '', '--significant_islands_summary_output_file', dest='significant_islands_summary_output_file', action='store', type="string", default=None, help='test-W200-G600-islands-summary-FDR.01: summary file of significant islands with requirement of FDR=0.01.' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
75 parser.add_option( '', '--significant_islands_output_file', dest='significant_islands_output_file', action='store', type="string", default=None, help='test-W200-G600-FDR.01-island.bed: delineation of significant islands in "chrom start end read-count-from-redundancy_removed-test.bed" format' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
76 parser.add_option( '', '--island_filtered_output_file', dest='island_filtered_output_file', action='store', type="string", default=None, help='test-W200-G600-FDR.01-islandfiltered.bed: library of raw redundancy_removed reads on significant islands.' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
77 parser.add_option( '', '--island_filtered_normalized_wig_output_file', dest='island_filtered_normalized_wig_output_file', action='store', type="string", default=None, help='test-W200-G600-FDR.01-islandfiltered-normalized.wig: wig file for the island-filtered redundancy_removed reads.' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
78 (options, args) = parser.parse_args()
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
79
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
80 #check if valid build
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
81 assert options.dbkey in VALID_BUILDS, ValueError( "The specified build ('%s') is not available for this tool." % options.dbkey )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
82
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
83 #everything will occur in this temp directory
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
84 tmp_dir = tempfile.mkdtemp()
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
85
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
86 #link input files into tmp_dir and build command line
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
87 bed_base_filename = 'input_bed_file'
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
88 bed_filename = '%s.bed' % bed_base_filename
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
89 os.symlink( options.bed_file, os.path.join( tmp_dir, bed_filename ) )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
90 if options.control_file is not None:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
91 cmd = "SICER.sh"
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
92 else:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
93 cmd = "SICER-rb.sh"
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
94 cmd = '%s "%s" "%s"' % ( cmd, tmp_dir, bed_filename )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
95 if options.control_file is not None:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
96 control_base_filename = 'input_control_file'
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
97 control_filename = '%s.bed' % control_base_filename
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
98 os.symlink( options.control_file, os.path.join( tmp_dir, control_filename ) )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
99 cmd = '%s "%s"' % ( cmd, control_filename )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
100 cmd = '%s "%s" "%s" "%i" "%i" "%i" "%f" "%i" "%s"' % ( cmd, tmp_dir, options.dbkey, options.redundancy_threshold, options.window_size, options.fragment_size, options.effective_genome_fraction, options.gap_size, options.error_cut_off )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
101
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
102 #set up stdout and stderr output options
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
103 stdout = open_file_from_option( options.stdout, mode = 'wb' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
104 stderr = open_file_from_option( options.stderr, mode = 'wb' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
105 #if no stderr file is specified, we'll use our own
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
106 if stderr is None:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
107 stderr = tempfile.NamedTemporaryFile( dir=tmp_dir )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
108 stderr.close()
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
109 stderr = open( stderr.name, 'w+b' )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
110
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
111 proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=tmp_dir )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
112 return_code = proc.wait()
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
113
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
114 if return_code:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
115 stderr_target = sys.stderr
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
116 else:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
117 stderr_target = stdout #sys.stdout
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
118 stderr_target.write( "\nAdditionally, these warnings were reported:\n" )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
119 stderr.flush()
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
120 stderr.seek(0)
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
121 while True:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
122 chunk = stderr.read( CHUNK_SIZE )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
123 if chunk:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
124 stderr_target.write( chunk )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
125 else:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
126 break
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
127 stderr.close()
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
128
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
129 try:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
130 #move files to where they belong
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
131 shutil.move( os.path.join( tmp_dir,'%s-%i-removed.bed' % ( bed_base_filename, options.redundancy_threshold ) ), options.redundancy_removed_test_bed_output_file )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
132 shutil.move( os.path.join( tmp_dir,'%s-W%i.graph' % ( bed_base_filename, options.window_size ) ), options.summary_graph_output_file )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
133 if options.fix_off_by_one_errors: add_one_to_file_column( options.summary_graph_output_file, 2 )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
134 shutil.move( os.path.join( tmp_dir,'%s-W%i-normalized.wig' % ( bed_base_filename, options.window_size ) ), options.test_normalized_wig_output_file )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
135 if options.control_file is not None:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
136 shutil.move( os.path.join( tmp_dir,'%s-%i-removed.bed' % ( control_base_filename, options.redundancy_threshold ) ), options.redundancy_removed_control_bed_output_file )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
137 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i.scoreisland' % ( bed_base_filename, options.window_size, options.gap_size ) ), options.score_island_output_file )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
138 if options.fix_off_by_one_errors: add_one_to_file_column( options.score_island_output_file, 2 )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
139 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-islands-summary' % ( bed_base_filename, options.window_size, options.gap_size ) ), options.islands_summary_output_file )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
140 if options.fix_off_by_one_errors: add_one_to_file_column( options.islands_summary_output_file, 2 )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
141 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-islands-summary-FDR%s' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.significant_islands_summary_output_file )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
142 if options.fix_off_by_one_errors: add_one_to_file_column( options.significant_islands_summary_output_file, 2 )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
143 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-FDR%s-island.bed' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.significant_islands_output_file )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
144 if options.fix_off_by_one_errors: add_one_to_file_column( options.significant_islands_output_file, 2 )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
145 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-FDR%s-islandfiltered.bed' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.island_filtered_output_file )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
146 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-FDR%s-islandfiltered-normalized.wig' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.island_filtered_normalized_wig_output_file )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
147 else:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
148 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-E%s.scoreisland' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.score_island_output_file )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
149 if options.fix_off_by_one_errors: add_one_to_file_column( options.score_island_output_file, 2 )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
150 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-E%s-islandfiltered.bed' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.island_filtered_output_file )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
151 shutil.move( os.path.join( tmp_dir,'%s-W%i-G%i-E%s-islandfiltered-normalized.wig' % ( bed_base_filename, options.window_size, options.gap_size, options.error_cut_off ) ), options.island_filtered_normalized_wig_output_file )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
152 except Exception, e:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
153 raise e
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
154 finally:
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
155 cleanup_before_exit( tmp_dir )
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
156
2862c48684da Imported from capsule None
devteam
parents:
diff changeset
157 if __name__=="__main__": __main__()