Mercurial > repos > jjohnson > shear
annotate shear_wrapper.py @ 4:a82400332451
Use shear_wrapper.py to generate reference indexes when needed
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Tue, 16 Jul 2013 12:38:48 -0500 |
parents | |
children | 0158f7356ffd |
rev | line source |
---|---|
4
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
1 #!/usr/bin/env python |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
2 |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
3 import optparse, os, shutil, subprocess, sys, tempfile, os.path |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
4 |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
5 def stop_err( msg, code ): |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
6 sys.stderr.write( '%s\n' % msg ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
7 sys.exit(code) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
8 |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
9 def __main__(): |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
10 #Parse Command Line |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
11 parser = optparse.OptionParser() |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
12 parser.add_option( '-j', '--jar_path', dest='jar_path', help='Path to SHEAR.jar' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
13 parser.add_option( '-C', '--command', dest='command', help='SHEAR command to run' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
14 parser.add_option( '-p', '--prefix', dest='prefix', help='Prefix for all generated files. Required.' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
15 parser.add_option( '-b', '--bam', dest='bam', help='BAM alignment file containing the input sequences to the assembly.' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
16 parser.add_option( '-B', '--bai', dest='bai', help='BAM index file (.bai)' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
17 parser.add_option( '-f', '--fasta_reference', dest='fasta', help='The reference sequence fasta file' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
18 parser.add_option( '-F', '--fasta_index', dest='fai', help='The .fai index file for the reference sequence fasta' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
19 parser.add_option( '-t', '--twobit', dest='twobit', help='The .2bit encoding of the reference sequence fasta generated by faToTwoBit' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
20 parser.add_option( '-i', '--bwa_index', dest='bwa_index', help='The bwa index of the reference sequence' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
21 parser.add_option( '-r', '--report', dest='report', help='The SHEAR output report' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
22 parser.add_option( '-s', '--sdi', dest='sdi', help='The SHEAR sdi input from the SHEAR sv command' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
23 parser.add_option( '-o', '--output', dest='output', help='The SHEAR output assembly fasta file' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
24 parser.add_option( '-D', '--svidx_dir', dest='svidx_dir', help='The SHEAR output assembly fasta file' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
25 (options, args) = parser.parse_args() |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
26 |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
27 def make_ref(src, dest, copy=False): |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
28 if copy: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
29 shutil.copy( src, dest ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
30 else: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
31 os.symlink( src, dest ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
32 |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
33 # output version # of tool |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
34 #try: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
35 #except: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
36 # sys.stdout.write( 'Could not determine BWA version\n' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
37 if not options.jar_path: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
38 stop_err('path to SHEAR.jar is required',1) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
39 if options.command: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
40 command = options.command |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
41 else: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
42 stop_err('SHEAR command is required',1) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
43 args = [ 'java','-jar' ] |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
44 args.append( options.jar_path ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
45 args.append( command ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
46 # Check required params for command |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
47 buffsize = 1048576 |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
48 copy_ref = False |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
49 prefix = options.prefix if options.prefix and len(options.prefix) > 0 else 'ref' |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
50 if command in ['sv']: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
51 args.append( '-p' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
52 args.append( prefix ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
53 if options.svidx_dir and command in ['sv']: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
54 if not os.path.isdir(options.svidx_dir): |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
55 os.makedirs(options.svidx_dir) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
56 ref_prefix = os.path.join(options.svidx_dir,prefix) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
57 copy_ref = True |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
58 else: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
59 ref_prefix = os.path.join(os.getcwd(),prefix) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
60 |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
61 if command in ['sv','assemble']: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
62 ref_fasta = ref_prefix + '.fa' |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
63 ref_index = ref_fasta + '.fai' |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
64 if options.fasta: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
65 make_ref( options.fasta, ref_fasta, copy=copy_ref ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
66 else: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
67 stop_err('Reference fasta file is required',3) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
68 # if reference fasta index is not supllied, generate it |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
69 if options.fai: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
70 make_ref( options.fai, ref_index, copy=copy_ref ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
71 elif os.path.exists(options.fasta + '.fai'): |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
72 make_ref( options.fasta + '.fai', ref_index, copy=copy_ref ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
73 else: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
74 # generate fasta fai index |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
75 cmd1 = 'samtools faidx %s' % (ref_fasta ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
76 try: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
77 tmp_name = tempfile.NamedTemporaryFile(prefix='fai_', suffix='.err').name |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
78 tmp_stderr = open( tmp_name, 'wb' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
79 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
80 returncode = proc.wait() |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
81 tmp_stderr.close() |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
82 if returncode != 0: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
83 stderr = '' |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
84 # get stderr, allowing for case where it's very large |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
85 tmp_stderr = open( tmp, 'rb' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
86 try: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
87 while True: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
88 stderr += tmp_stderr.read( buffsize ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
89 if not stderr or len( stderr ) % buffsize != 0: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
90 break |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
91 except OverflowError: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
92 pass |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
93 raise Exception, stderr |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
94 except Exception, e: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
95 stop_err( 'Error indexing reference sequence fasta file. ' + str( e ),5 ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
96 args.append( '-f' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
97 args.append( ref_fasta ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
98 |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
99 if command in ['sv']: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
100 if not options.bam: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
101 stop_err('BAM alignment file is required',2) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
102 |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
103 # if reference 2bit is not supplied, generate it |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
104 twobit = ref_prefix + '.2bit' |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
105 if options.twobit: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
106 make_ref( options.twobit, twobit, copy=copy_ref ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
107 else: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
108 # generate 2bit index |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
109 cmd1 = 'faToTwoBit %s %s' % (ref_fasta, twobit ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
110 try: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
111 tmp_name = tempfile.NamedTemporaryFile(prefix='twobit_', suffix='.err').name |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
112 tmp_stderr = open( tmp_name, 'wb' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
113 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
114 returncode = proc.wait() |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
115 tmp_stderr.close() |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
116 if returncode != 0: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
117 stderr = '' |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
118 # get stderr, allowing for case where it's very large |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
119 tmp_stderr = open( tmp, 'rb' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
120 try: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
121 while True: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
122 stderr += tmp_stderr.read( buffsize ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
123 if not stderr or len( stderr ) % buffsize != 0: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
124 break |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
125 except OverflowError: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
126 pass |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
127 raise Exception, stderr |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
128 except Exception, e: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
129 stop_err( 'Error indexing reference sequence fasta file. ' + str( e ),6 ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
130 args.append( '-t' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
131 args.append( twobit ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
132 |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
133 # if bwa index is not supplied, generate it |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
134 bwa_index = ref_fasta |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
135 if options.bwa_index: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
136 if copy_ref: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
137 shutil.copytree(os.path.dirname(bwa_index),options.svidx_dir) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
138 bwa_index = options.bwa_index |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
139 else: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
140 ONE_GB = 2**30 |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
141 if os.stat( ref_fasta ).st_size <= ONE_GB: #use 1 GB as cut off for memory vs. max of 2gb database size; this is somewhat arbitrary |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
142 index_algorithm = 'is' |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
143 else: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
144 index_algorithm = 'bwtsw' |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
145 cmd1 = 'bwa index -a %s %s' % ( index_algorithm, ref_fasta ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
146 try: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
147 tmp_name = tempfile.NamedTemporaryFile(prefix='bwa_index_', suffix='.err').name |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
148 tmp_stderr = open( tmp_name, 'wb' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
149 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
150 returncode = proc.wait() |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
151 tmp_stderr.close() |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
152 if returncode != 0: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
153 stderr = '' |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
154 # get stderr, allowing for case where it's very large |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
155 tmp_stderr = open( tmp, 'rb' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
156 try: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
157 while True: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
158 stderr += tmp_stderr.read( buffsize ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
159 if not stderr or len( stderr ) % buffsize != 0: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
160 break |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
161 except OverflowError: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
162 pass |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
163 raise Exception, stderr |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
164 bwa_index = ref_fasta |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
165 except Exception, e: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
166 # clean up temp dirs |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
167 stop_err( 'Error creating bwa index. ' + str( e ),7 ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
168 args.append( '-i' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
169 args.append( bwa_index ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
170 |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
171 bam_file = os.path.join(os.getcwd(),prefix + '.bam') |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
172 bai_file = bam_file + '.bai' |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
173 if options.bam: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
174 os.symlink( options.bam, bam_file ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
175 else: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
176 stop_err('BAM alignment file is required',2) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
177 # if bam index is not supplied, generate it |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
178 if options.bai: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
179 os.symlink( options.bai, bai_file ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
180 elif os.path.exists(options.bam + '.bai'): |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
181 os.symlink( options.bam + '.bai', bai_file ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
182 else: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
183 # generate bam index |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
184 cmd1 = 'samtools index %s %s' % (bam_file, bai_file ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
185 try: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
186 tmp_name = tempfile.NamedTemporaryFile(prefix='bai_', suffix='.err').name |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
187 tmp_stderr = open( tmp_name, 'wb' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
188 proc = subprocess.Popen( args=cmd1, shell=True, stderr=tmp_stderr.fileno() ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
189 returncode = proc.wait() |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
190 tmp_stderr.close() |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
191 if returncode != 0: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
192 stderr = '' |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
193 # get stderr, allowing for case where it's very large |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
194 tmp_stderr = open( tmp, 'rb' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
195 try: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
196 while True: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
197 stderr += tmp_stderr.read( buffsize ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
198 if not stderr or len( stderr ) % buffsize != 0: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
199 break |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
200 except OverflowError: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
201 pass |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
202 raise Exception, stderr |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
203 except Exception, e: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
204 stop_err( 'Error indexing BAM alignment file. ' + str( e ),4) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
205 args.append( '-b' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
206 args.append( bam_file ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
207 |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
208 if command in ['assemble']: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
209 if not options.sdi or not options.output: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
210 stop_err('SHEAR .sdi file and output file are required for assemble',2) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
211 args.append( '-s' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
212 args.append( options.sdi ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
213 args.append( '-o' ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
214 args.append( options.output ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
215 |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
216 # Run SHEAR command |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
217 print >> sys.stdout, "%s" % ' '.join(args) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
218 try: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
219 proc = subprocess.Popen( args=args, shell=False ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
220 returncode = proc.wait() |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
221 if returncode != 0: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
222 stop_err( 'Error running SHEAR ' + command, returncode ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
223 if command in ['sv']: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
224 report_path = os.path.join(os.getcwd(),prefix + '.report') |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
225 if os.path.exists(report_path): |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
226 if options.report: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
227 shutil.copy(report_path,options.report) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
228 else: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
229 raise Exception, 'no report file' |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
230 sdi_path = os.path.join(os.getcwd(),prefix + '.sdi') |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
231 if os.path.exists(sdi_path): |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
232 if options.sdi: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
233 shutil.copy(sdi_path,options.sdi) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
234 else: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
235 raise Exception, 'no sdi file' |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
236 except Exception, e: |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
237 # clean up temp dirs |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
238 stop_err( 'Error running SHEAR %s %s' % (command,str(e)),9 ) |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
239 |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
240 if __name__=="__main__": __main__() |
a82400332451
Use shear_wrapper.py to generate reference indexes when needed
Jim Johnson <jj@umn.edu>
parents:
diff
changeset
|
241 |