annotate macs_wrapper.py @ 0:512c6b2dba00 draft

Imported from capsule None
author devteam
date Thu, 23 Jan 2014 12:32:02 -0500
parents
children 5d67bc6f385f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
1 import glob
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
2 import gzip
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
3 import json
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
4 import os
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
5 import os.path
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
6 import shutil
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
7 import subprocess
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
8 import sys
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
9 import tempfile
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
10
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
11 CHUNK_SIZE = 1024
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
12
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
13 def gunzip_cat_glob_path( glob_path, target_filename, delete = False ):
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
14 out = open( target_filename, 'wb' )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
15 for filename in glob.glob( glob_path ):
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
16 fh = gzip.open( filename, 'rb' )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
17 while True:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
18 data = fh.read( CHUNK_SIZE )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
19 if data:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
20 out.write( data )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
21 else:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
22 break
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
23 fh.close()
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
24 if delete:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
25 os.unlink( filename )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
26 out.close()
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
27
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
28 def xls_to_interval( xls_file, interval_file, header = None ):
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
29 out = open( interval_file, 'wb' )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
30 if header:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
31 out.write( '#%s\n' % header )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
32 wrote_header = False
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
33 #From macs readme: Coordinates in XLS is 1-based which is different with BED format.
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
34 for line in open( xls_file ):
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
35 #keep all existing comment lines
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
36 if line.startswith( '#' ):
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
37 out.write( line )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
38 elif not wrote_header:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
39 out.write( '#%s' % line )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
40 wrote_header = True
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
41 else:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
42 fields = line.split( '\t' )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
43 if len( fields ) > 1:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
44 fields[1] = str( int( fields[1] ) - 1 )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
45 out.write( '\t'.join( fields ) )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
46 out.close()
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
47
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
48 def main():
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
49 options = json.load( open( sys.argv[1] ) )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
50 output_bed = sys.argv[2]
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
51 output_extra_html = sys.argv[3]
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
52 output_extra_path = sys.argv[4]
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
53
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
54 experiment_name = '_'.join( options['experiment_name'].split() ) #save experiment name here, it will be used by macs for filenames (gzip of wig files will fail with spaces - macs doesn't properly escape them)..need to replace all whitespace, split makes this easier
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
55 cmdline = "macs -t %s" % ",".join( options['input_chipseq'] )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
56 if options['input_control']:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
57 cmdline = "%s -c %s" % ( cmdline, ",".join( options['input_control'] ) )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
58 cmdline = "%s --format='%s' --name='%s' --gsize='%s' --tsize='%s' --bw='%s' --pvalue='%s' --mfold='%s' %s --lambdaset='%s' %s" % ( cmdline, options['format'], experiment_name, options['gsize'], options['tsize'], options['bw'], options['pvalue'], options['mfold'], options['nolambda'], options['lambdaset'], options['futurefdr'] )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
59 if 'wig' in options:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
60 wigextend = int( options['wig']['wigextend'] )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
61 if wigextend >= 0:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
62 wigextend = "--wigextend='%s'" % wigextend
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
63 else:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
64 wigextend = ''
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
65 cmdline = "%s --wig %s --space='%s'" % ( cmdline, wigextend, options['wig']['space'] )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
66 if 'nomodel' in options:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
67 cmdline = "%s --nomodel --shiftsize='%s'" % ( cmdline, options['nomodel'] )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
68 if 'diag' in options:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
69 cmdline = "%s --diag --fe-min='%s' --fe-max='%s' --fe-step='%s'" % ( cmdline, options['diag']['fe-min'], options['diag']['fe-max'], options['diag']['fe-step'] )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
70
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
71 tmp_dir = tempfile.mkdtemp() #macs makes very messy output, need to contain it into a temp dir, then provide to user
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
72 stderr_name = tempfile.NamedTemporaryFile().name # redirect stderr here, macs provides lots of info via stderr, make it into a report
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
73 proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=open( stderr_name, 'wb' ) )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
74 proc.wait()
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
75 #We don't want to set tool run to error state if only warnings or info, e.g. mfold could be decreased to improve model, but let user view macs log
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
76 #Do not terminate if error code, allow dataset (e.g. log) creation and cleanup
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
77 if proc.returncode:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
78 stderr_f = open( stderr_name )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
79 while True:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
80 chunk = stderr_f.read( CHUNK_SIZE )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
81 if not chunk:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
82 stderr_f.close()
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
83 break
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
84 sys.stderr.write( chunk )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
85
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
86 #run R to create pdf from model script
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
87 if os.path.exists( os.path.join( tmp_dir, "%s_model.r" % experiment_name ) ):
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
88 cmdline = 'R --vanilla --slave < "%s_model.r" > "%s_model.r.log"' % ( experiment_name, experiment_name )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
89 proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
90 proc.wait()
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
91
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
92
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
93 #move bed out to proper output file
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
94 created_bed_name = os.path.join( tmp_dir, "%s_peaks.bed" % experiment_name )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
95 if os.path.exists( created_bed_name ):
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
96 shutil.move( created_bed_name, output_bed )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
97
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
98 #parse xls files to interval files as needed
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
99 if options['xls_to_interval']:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
100 create_peak_xls_file = os.path.join( tmp_dir, '%s_peaks.xls' % experiment_name )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
101 if os.path.exists( create_peak_xls_file ):
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
102 xls_to_interval( create_peak_xls_file, options['xls_to_interval']['peaks_file'], header = 'peaks file' )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
103 create_peak_xls_file = os.path.join( tmp_dir, '%s_negative_peaks.xls' % experiment_name )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
104 if os.path.exists( create_peak_xls_file ):
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
105 xls_to_interval( create_peak_xls_file, options['xls_to_interval']['negative_peaks_file'], header = 'negative peaks file' )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
106
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
107 #merge and move wig files as needed, delete gz'd files and remove emptied dirs
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
108 if 'wig' in options:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
109 wig_base_dir = os.path.join( tmp_dir, "%s_MACS_wiggle" % experiment_name )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
110 if os.path.exists( wig_base_dir ):
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
111 #treatment
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
112 treatment_dir = os.path.join( wig_base_dir, "treat" )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
113 if os.path.exists( treatment_dir ):
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
114 gunzip_cat_glob_path( os.path.join( treatment_dir, "*.wig.gz" ), options['wig']['output_treatment_file'], delete = True )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
115 os.rmdir( treatment_dir )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
116 #control
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
117 if options['input_control']:
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
118 control_dir = os.path.join( wig_base_dir, "control" )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
119 if os.path.exists( control_dir ):
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
120 gunzip_cat_glob_path( os.path.join( control_dir, "*.wig.gz" ), options['wig']['output_control_file'], delete = True )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
121 os.rmdir( control_dir )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
122 os.rmdir( wig_base_dir )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
123
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
124 #move all remaining files to extra files path of html file output to allow user download
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
125 out_html = open( output_extra_html, 'wb' )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
126 out_html.write( '<html><head><title>Additional output created by MACS (%s)</title></head><body><h3>Additional Files:</h3><p><ul>\n' % experiment_name )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
127 os.mkdir( output_extra_path )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
128 for filename in sorted( os.listdir( tmp_dir ) ):
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
129 shutil.move( os.path.join( tmp_dir, filename ), os.path.join( output_extra_path, filename ) )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
130 out_html.write( '<li><a href="%s">%s</a></li>\n' % ( filename, filename ) )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
131 out_html.write( '</ul></p>\n' )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
132 out_html.write( '<h3>Messages from MACS:</h3>\n<p><pre>%s</pre></p>\n' % open( stderr_name, 'rb' ).read() )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
133 out_html.write( '</body></html>\n' )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
134 out_html.close()
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
135
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
136 os.unlink( stderr_name )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
137 os.rmdir( tmp_dir )
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
138
512c6b2dba00 Imported from capsule None
devteam
parents:
diff changeset
139 if __name__ == "__main__": main()