annotate tools/mira4/mira4_make_bam.py @ 15:b0ffe0e7282b draft

Uploaded v0.0.2 preview 7, fixed bash syntax error
author peterjc
date Thu, 20 Feb 2014 05:07:19 -0500
parents 302d13490b23
children 4abe8d59a438
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
1 #!/usr/bin/env python
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
2 """Wrapper script using miraconvert & samtools to get BAM from MIRA.
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
3 """
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
4 import os
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
5 import sys
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
6 import shutil
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
7 import subprocess
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
8 import tempfile
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
9
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
10 def stop_err(msg, err=1):
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
11 sys.stderr.write(msg+"\n")
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
12 sys.exit(err)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
13
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
14 def run(cmd, log_handle):
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
15 try:
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
16 child = subprocess.Popen(cmd, shell=True,
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
17 stdout=subprocess.PIPE,
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
18 stderr=subprocess.STDOUT)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
19 except Exception, err:
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
20 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
21 #TODO - call clean up?
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
22 log_handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
23 sys.exit(1)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
24 #Use .communicate as can get deadlocks with .wait(),
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
25 stdout, stderr = child.communicate()
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
26 assert not stderr #Should be empty as sent to stdout
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
27 if len(stdout) > 10000:
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
28 #miraconvert can be very verbose (is holding stdout in RAM a problem?)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
29 stdout = stdout.split("\n")
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
30 stdout = stdout[:10] + ["...", "<snip>", "..."] + stdout[-10:]
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
31 stdout = "\n".join(stdout)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
32 log_handle.write(stdout)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
33 return child.returncode
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
34
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
35 def make_bam(mira_convert, maf_file, fasta_file, bam_file, log_handle):
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
36 if not os.path.isfile(mira_convert):
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
37 return "Missing binary %r" % mira_convert
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
38 if not os.path.isfile(maf_file):
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
39 return "Missing input MIRA file: %r" % maf_file
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
40 if not os.path.isfile(fasta_file):
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
41 return "Missing padded FASTA file: %r" % fasta_file
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
42
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
43 log_handle.write("\n====================== Converting MIRA assembly to SAM =======================\n")
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
44 tmp_dir = tempfile.mkdtemp()
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
45 sam_file = os.path.join(tmp_dir, "x.sam")
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
46
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
47 # Note add nbb to the template name, possible MIRA 4.0 RC4 bug
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
48 cmd = '"%s" -f maf -t samnbb "%s" "%snbb"' % (mira_convert, maf_file, sam_file)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
49 return_code = run(cmd, log_handle)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
50 if return_code:
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
51 return "Error %i from command:\n%s" % (return_code, cmd)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
52 if not os.path.isfile(sam_file):
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
53 return "Conversion from MIRA to SAM failed"
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
54
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
55 log_handle.write("\n================= Converting MIRA assembly from SAM to BAM ===================\n")
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
56 #Also doing SAM to (uncompressed) BAM during depad
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
57 bam_stem = bam_file + ".tmp" # Have write permissions and want final file in this folder
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
58 cmd = 'samtools depad -S -u -T "%s" "%s" | samtools sort - "%s"' % (fasta_file, sam_file, bam_stem)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
59 return_code = run(cmd, log_handle)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
60 if return_code:
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
61 return "Error %i from command:\n%s" % (return_code, cmd)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
62 if not os.path.isfile(bam_stem + ".bam"):
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
63 return "samtools depad or sort failed to produce BAM file"
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
64
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
65 os.remove(sam_file)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
66 os.rmdir(tmp_dir)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
67
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
68 log_handle.write("\n====================== Indexing MIRA assembly BAM file =======================\n")
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
69 cmd = 'samtools index "%s.bam"' % bam_stem
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
70 return_code = run(cmd, log_handle)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
71 if return_code:
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
72 return "Error %i from command:\n%s" % (return_code, cmd)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
73 if not os.path.isfile(bam_stem + ".bam.bai"):
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
74 return "samtools indexing of BAM file failed to produce BAI file"
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
75
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
76 shutil.move(bam_stem + ".bam", bam_file)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
77 os.remove(bam_stem + ".bam.bai") #Let Galaxy handle that...
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
78
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
79 return None #Good :)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
80
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
81 if __name__ == "__main__":
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
82 mira_convert, maf_file, fasta_file, bam_file = sys.argv[1:]
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
83 msg = make_bam(mira_convert, maf_file, fasta_file, bam_file, sys.stdout)
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
84 if msg:
302d13490b23 Uploaded v0.0.2 preview 1, BAM output
peterjc
parents:
diff changeset
85 stop_err(msg)