annotate tools/mira4_0/mira4_make_bam.py @ 39:bbf14bb9607b draft default tip

planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
author peterjc
date Mon, 03 Jun 2019 13:29:00 -0400
parents cee8f9005e43
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
1 #!/usr/bin/env python
38
cee8f9005e43 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 206259620376b322fc8ed99a6efdd3712f38764b
peterjc
parents: 35
diff changeset
2 """Wrapper script using miraconvert & samtools to get BAM from MIRA."""
32
56b421d59805 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents: 31
diff changeset
3
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
4 import os
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
5 import shutil
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
6 import subprocess
32
56b421d59805 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents: 31
diff changeset
7 import sys
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
8 import tempfile
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
9
32
56b421d59805 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents: 31
diff changeset
10
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
11 def run(cmd, log_handle):
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
12 try:
39
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
13 child = subprocess.Popen(
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
14 cmd,
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
15 shell=True,
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
16 universal_newlines=True,
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
17 stdout=subprocess.PIPE,
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
18 stderr=subprocess.STDOUT,
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
19 )
32
56b421d59805 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents: 31
diff changeset
20 except Exception as err:
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
21 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
32
56b421d59805 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents: 31
diff changeset
22 # TODO - call clean up?
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
23 log_handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
24 sys.exit(1)
32
56b421d59805 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents: 31
diff changeset
25 # Use .communicate as can get deadlocks with .wait(),
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
26 stdout, stderr = child.communicate()
32
56b421d59805 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents: 31
diff changeset
27 assert not stderr # Should be empty as sent to stdout
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
28 if len(stdout) > 10000:
32
56b421d59805 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents: 31
diff changeset
29 # miraconvert can be very verbose (is holding stdout in RAM a problem?)
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
30 stdout = stdout.split("\n")
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
31 stdout = stdout[:10] + ["...", "<snip>", "..."] + stdout[-10:]
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
32 stdout = "\n".join(stdout)
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
33 log_handle.write(stdout)
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
34 return child.returncode
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
35
32
56b421d59805 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents: 31
diff changeset
36
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
37 def depad(fasta_file, sam_file, bam_file, log_handle):
39
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
38 log_handle.write(
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
39 "\n================= Converting MIRA assembly from SAM to BAM ===================\n" # noqa: E501
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
40 )
32
56b421d59805 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents: 31
diff changeset
41 # Also doing SAM to (uncompressed) BAM during depad
39
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
42 bam_stem = (
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
43 bam_file + ".tmp"
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
44 ) # Have write permissions and want final file in this folder
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
45 cmd = 'samtools depad -S -u -T "%s" "%s" | samtools sort - "%s"' % (
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
46 fasta_file,
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
47 sam_file,
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
48 bam_stem,
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
49 )
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
50 return_code = run(cmd, log_handle)
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
51 if return_code:
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
52 return "Error %i from command:\n%s" % (return_code, cmd)
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
53 if not os.path.isfile(bam_stem + ".bam"):
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
54 return "samtools depad or sort failed to produce BAM file"
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
55
39
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
56 log_handle.write(
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
57 "\n====================== Indexing MIRA assembly BAM file =======================\n" # noqa: E501
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
58 )
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
59 cmd = 'samtools index "%s.bam"' % bam_stem
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
60 return_code = run(cmd, log_handle)
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
61 if return_code:
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
62 return "Error %i from command:\n%s" % (return_code, cmd)
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
63 if not os.path.isfile(bam_stem + ".bam.bai"):
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
64 return "samtools indexing of BAM file failed to produce BAI file"
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
65
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
66 shutil.move(bam_stem + ".bam", bam_file)
32
56b421d59805 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents: 31
diff changeset
67 os.remove(bam_stem + ".bam.bai") # Let Galaxy handle that...
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
68
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
69
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
70 def make_bam(mira_convert, maf_file, fasta_file, bam_file, log_handle):
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
71 if not os.path.isfile(maf_file):
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
72 return "Missing input MIRA file: %r" % maf_file
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
73 if not os.path.isfile(fasta_file):
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
74 return "Missing padded FASTA file: %r" % fasta_file
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
75
39
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
76 log_handle.write(
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
77 "\n====================== Converting MIRA assembly to SAM =======================\n" # noqa: E501
bbf14bb9607b planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
peterjc
parents: 38
diff changeset
78 )
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
79 tmp_dir = tempfile.mkdtemp()
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
80 sam_file = os.path.join(tmp_dir, "x.sam")
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
81
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
82 # Note add nbb to the template name, possible MIRA 4.0 RC4 bug
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
83 cmd = '"%s" -f maf -t samnbb "%s" "%snbb"' % (mira_convert, maf_file, sam_file)
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
84 return_code = run(cmd, log_handle)
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
85 if return_code:
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
86 return "Error %i from command:\n%s" % (return_code, cmd)
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
87 if not os.path.isfile(sam_file):
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
88 return "Conversion from MIRA to SAM failed"
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
89
32
56b421d59805 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents: 31
diff changeset
90 # Also doing SAM to (uncompressed) BAM during depad
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
91 msg = depad(fasta_file, sam_file, bam_file, log_handle)
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
92 if msg:
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
93 return msg
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
94
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
95 os.remove(sam_file)
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
96 os.rmdir(tmp_dir)
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
97
32
56b421d59805 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents: 31
diff changeset
98 return None # Good :)
56b421d59805 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents: 31
diff changeset
99
25
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
100
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
101 if __name__ == "__main__":
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
102 mira_convert, maf_file, fasta_file, bam_file = sys.argv[1:]
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
103 msg = make_bam(mira_convert, maf_file, fasta_file, bam_file, sys.stdout)
55ae131c5862 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
104 if msg:
31
fd95aaef8818 planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents: 25
diff changeset
105 sys.exit(msg)