annotate shear_wrapper.py @ 6:0158f7356ffd

Add new outputs and params to shear_sv.xml for version 2.3
author Jim Johnson <jj@umn.edu>
date Mon, 21 Oct 2013 08:54:58 -0500
parents a82400332451
children 8fac6c7d32fd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
1 #!/usr/bin/env python
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
2
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
3 import optparse, os, shutil, subprocess, sys, tempfile, os.path
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
4
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
5 def stop_err( msg, code ):
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
6 sys.stderr.write( '%s\n' % msg )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
7 sys.exit(code)
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
8
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
9 def __main__():
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
10 #Parse Command Line
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
11 parser = optparse.OptionParser()
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
12 parser.add_option( '-j', '--jar_path', dest='jar_path', help='Path to SHEAR.jar' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
13 parser.add_option( '-C', '--command', dest='command', help='SHEAR command to run' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
14 parser.add_option( '-p', '--prefix', dest='prefix', help='Prefix for all generated files. Required.' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
15 parser.add_option( '-b', '--bam', dest='bam', help='BAM alignment file containing the input sequences to the assembly.' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
16 parser.add_option( '-B', '--bai', dest='bai', help='BAM index file (.bai)' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
17 parser.add_option( '-f', '--fasta_reference', dest='fasta', help='The reference sequence fasta file' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
18 parser.add_option( '-F', '--fasta_index', dest='fai', help='The .fai index file for the reference sequence fasta' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
19 parser.add_option( '-t', '--twobit', dest='twobit', help='The .2bit encoding of the reference sequence fasta generated by faToTwoBit' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
20 parser.add_option( '-i', '--bwa_index', dest='bwa_index', help='The bwa index of the reference sequence' )
6
0158f7356ffd Add new outputs and params to shear_sv.xml for version 2.3
Jim Johnson <jj@umn.edu>
parents: 4
diff changeset
21 parser.add_option( '-R', '--report', dest='report', help='The SHEAR output report' )
4
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
22 parser.add_option( '-s', '--sdi', dest='sdi', help='The SHEAR sdi input from the SHEAR sv command' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
23 parser.add_option( '-o', '--output', dest='output', help='The SHEAR output assembly fasta file' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
24 parser.add_option( '-D', '--svidx_dir', dest='svidx_dir', help='The SHEAR output assembly fasta file' )
6
0158f7356ffd Add new outputs and params to shear_sv.xml for version 2.3
Jim Johnson <jj@umn.edu>
parents: 4
diff changeset
25 parser.add_option( '-S', '--sv-only', dest='sv_only', action="store_true", help='SV Only prediction mode.' )
0158f7356ffd Add new outputs and params to shear_sv.xml for version 2.3
Jim Johnson <jj@umn.edu>
parents: 4
diff changeset
26 parser.add_option( '-r', '--region', dest='region', help='Region of the input alignment to analyze' )
4
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
27 (options, args) = parser.parse_args()
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
28
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
29 def make_ref(src, dest, copy=False):
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
30 if copy:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
31 shutil.copy( src, dest )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
32 else:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
33 os.symlink( src, dest )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
34
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
35 # output version # of tool
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
36 #try:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
37 #except:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
38 # sys.stdout.write( 'Could not determine BWA version\n' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
39 if not options.jar_path:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
40 stop_err('path to SHEAR.jar is required',1)
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
41 if options.command:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
42 command = options.command
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
43 else:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
44 stop_err('SHEAR command is required',1)
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
45 args = [ 'java','-jar' ]
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
46 args.append( options.jar_path )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
47 args.append( command )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
48 # Check required params for command
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
49 buffsize = 1048576
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
50 copy_ref = False
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
51 prefix = options.prefix if options.prefix and len(options.prefix) > 0 else 'ref'
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
52 if command in ['sv']:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
53 args.append( '-p' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
54 args.append( prefix )
6
0158f7356ffd Add new outputs and params to shear_sv.xml for version 2.3
Jim Johnson <jj@umn.edu>
parents: 4
diff changeset
55 if options.sv_only:
0158f7356ffd Add new outputs and params to shear_sv.xml for version 2.3
Jim Johnson <jj@umn.edu>
parents: 4
diff changeset
56 args.append('--sv-only')
0158f7356ffd Add new outputs and params to shear_sv.xml for version 2.3
Jim Johnson <jj@umn.edu>
parents: 4
diff changeset
57 if options.region:
0158f7356ffd Add new outputs and params to shear_sv.xml for version 2.3
Jim Johnson <jj@umn.edu>
parents: 4
diff changeset
58 args.append('-r')
0158f7356ffd Add new outputs and params to shear_sv.xml for version 2.3
Jim Johnson <jj@umn.edu>
parents: 4
diff changeset
59 args.append(options.region)
4
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
60 if options.svidx_dir and command in ['sv']:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
61 if not os.path.isdir(options.svidx_dir):
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
62 os.makedirs(options.svidx_dir)
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
63 ref_prefix = os.path.join(options.svidx_dir,prefix)
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
64 copy_ref = True
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
65 else:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
66 ref_prefix = os.path.join(os.getcwd(),prefix)
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
67
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
68 if command in ['sv','assemble']:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
69 ref_fasta = ref_prefix + '.fa'
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
70 ref_index = ref_fasta + '.fai'
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
71 if options.fasta:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
72 make_ref( options.fasta, ref_fasta, copy=copy_ref )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
73 else:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
74 stop_err('Reference fasta file is required',3)
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
75 # if reference fasta index is not supllied, generate it
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
76 if options.fai:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
77 make_ref( options.fai, ref_index, copy=copy_ref )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
78 elif os.path.exists(options.fasta + '.fai'):
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
79 make_ref( options.fasta + '.fai', ref_index, copy=copy_ref )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
80 else:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
81 # generate fasta fai index
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
82 cmd1 = 'samtools faidx %s' % (ref_fasta )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
83 try:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
84 tmp_name = tempfile.NamedTemporaryFile(prefix='fai_', suffix='.err').name
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
85 tmp_stderr = open( tmp_name, 'wb' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
86 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
87 returncode = proc.wait()
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
88 tmp_stderr.close()
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
89 if returncode != 0:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
90 stderr = ''
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
91 # get stderr, allowing for case where it's very large
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
92 tmp_stderr = open( tmp, 'rb' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
93 try:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
94 while True:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
95 stderr += tmp_stderr.read( buffsize )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
96 if not stderr or len( stderr ) % buffsize != 0:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
97 break
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
98 except OverflowError:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
99 pass
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
100 raise Exception, stderr
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
101 except Exception, e:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
102 stop_err( 'Error indexing reference sequence fasta file. ' + str( e ),5 )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
103 args.append( '-f' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
104 args.append( ref_fasta )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
105
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
106 if command in ['sv']:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
107 if not options.bam:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
108 stop_err('BAM alignment file is required',2)
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
109
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
110 # if reference 2bit is not supplied, generate it
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
111 twobit = ref_prefix + '.2bit'
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
112 if options.twobit:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
113 make_ref( options.twobit, twobit, copy=copy_ref )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
114 else:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
115 # generate 2bit index
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
116 cmd1 = 'faToTwoBit %s %s' % (ref_fasta, twobit )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
117 try:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
118 tmp_name = tempfile.NamedTemporaryFile(prefix='twobit_', suffix='.err').name
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
119 tmp_stderr = open( tmp_name, 'wb' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
120 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
121 returncode = proc.wait()
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
122 tmp_stderr.close()
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
123 if returncode != 0:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
124 stderr = ''
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
125 # get stderr, allowing for case where it's very large
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
126 tmp_stderr = open( tmp, 'rb' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
127 try:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
128 while True:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
129 stderr += tmp_stderr.read( buffsize )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
130 if not stderr or len( stderr ) % buffsize != 0:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
131 break
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
132 except OverflowError:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
133 pass
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
134 raise Exception, stderr
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
135 except Exception, e:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
136 stop_err( 'Error indexing reference sequence fasta file. ' + str( e ),6 )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
137 args.append( '-t' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
138 args.append( twobit )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
139
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
140 # if bwa index is not supplied, generate it
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
141 bwa_index = ref_fasta
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
142 if options.bwa_index:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
143 if copy_ref:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
144 shutil.copytree(os.path.dirname(bwa_index),options.svidx_dir)
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
145 bwa_index = options.bwa_index
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
146 else:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
147 ONE_GB = 2**30
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
148 if os.stat( ref_fasta ).st_size <= ONE_GB: #use 1 GB as cut off for memory vs. max of 2gb database size; this is somewhat arbitrary
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
149 index_algorithm = 'is'
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
150 else:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
151 index_algorithm = 'bwtsw'
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
152 cmd1 = 'bwa index -a %s %s' % ( index_algorithm, ref_fasta )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
153 try:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
154 tmp_name = tempfile.NamedTemporaryFile(prefix='bwa_index_', suffix='.err').name
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
155 tmp_stderr = open( tmp_name, 'wb' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
156 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
157 returncode = proc.wait()
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
158 tmp_stderr.close()
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
159 if returncode != 0:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
160 stderr = ''
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
161 # get stderr, allowing for case where it's very large
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
162 tmp_stderr = open( tmp, 'rb' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
163 try:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
164 while True:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
165 stderr += tmp_stderr.read( buffsize )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
166 if not stderr or len( stderr ) % buffsize != 0:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
167 break
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
168 except OverflowError:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
169 pass
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
170 raise Exception, stderr
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
171 bwa_index = ref_fasta
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
172 except Exception, e:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
173 # clean up temp dirs
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
174 stop_err( 'Error creating bwa index. ' + str( e ),7 )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
175 args.append( '-i' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
176 args.append( bwa_index )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
177
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
178 bam_file = os.path.join(os.getcwd(),prefix + '.bam')
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
179 bai_file = bam_file + '.bai'
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
180 if options.bam:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
181 os.symlink( options.bam, bam_file )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
182 else:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
183 stop_err('BAM alignment file is required',2)
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
184 # if bam index is not supplied, generate it
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
185 if options.bai:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
186 os.symlink( options.bai, bai_file )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
187 elif os.path.exists(options.bam + '.bai'):
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
188 os.symlink( options.bam + '.bai', bai_file )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
189 else:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
190 # generate bam index
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
191 cmd1 = 'samtools index %s %s' % (bam_file, bai_file )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
192 try:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
193 tmp_name = tempfile.NamedTemporaryFile(prefix='bai_', suffix='.err').name
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
194 tmp_stderr = open( tmp_name, 'wb' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
195 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
196 returncode = proc.wait()
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
197 tmp_stderr.close()
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
198 if returncode != 0:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
199 stderr = ''
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
200 # get stderr, allowing for case where it's very large
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
201 tmp_stderr = open( tmp, 'rb' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
202 try:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
203 while True:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
204 stderr += tmp_stderr.read( buffsize )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
205 if not stderr or len( stderr ) % buffsize != 0:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
206 break
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
207 except OverflowError:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
208 pass
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
209 raise Exception, stderr
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
210 except Exception, e:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
211 stop_err( 'Error indexing BAM alignment file. ' + str( e ),4)
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
212 args.append( '-b' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
213 args.append( bam_file )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
214
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
215 if command in ['assemble']:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
216 if not options.sdi or not options.output:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
217 stop_err('SHEAR .sdi file and output file are required for assemble',2)
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
218 args.append( '-s' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
219 args.append( options.sdi )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
220 args.append( '-o' )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
221 args.append( options.output )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
222
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
223 # Run SHEAR command
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
224 print >> sys.stdout, "%s" % ' '.join(args)
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
225 try:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
226 proc = subprocess.Popen( args=args, shell=False )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
227 returncode = proc.wait()
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
228 if returncode != 0:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
229 stop_err( 'Error running SHEAR ' + command, returncode )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
230 except Exception, e:
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
231 # clean up temp dirs
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
232 stop_err( 'Error running SHEAR %s %s' % (command,str(e)),9 )
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
233
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
234 if __name__=="__main__": __main__()
a82400332451 Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff changeset
235