view tools/mira4_0/mira4_make_bam.py @ 39:bbf14bb9607b draft default tip

planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
author peterjc
date Mon, 03 Jun 2019 13:29:00 -0400
parents cee8f9005e43
children
line wrap: on
line source

#!/usr/bin/env python
"""Wrapper script using miraconvert & samtools to get BAM from MIRA."""

import os
import shutil
import subprocess
import sys
import tempfile


def run(cmd, log_handle):
    try:
        child = subprocess.Popen(
            cmd,
            shell=True,
            universal_newlines=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
        )
    except Exception as err:
        sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
        # TODO - call clean up?
        log_handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
        sys.exit(1)
    # Use .communicate as can get deadlocks with .wait(),
    stdout, stderr = child.communicate()
    assert not stderr  # Should be empty as sent to stdout
    if len(stdout) > 10000:
        # miraconvert can be very verbose (is holding stdout in RAM a problem?)
        stdout = stdout.split("\n")
        stdout = stdout[:10] + ["...", "<snip>", "..."] + stdout[-10:]
        stdout = "\n".join(stdout)
    log_handle.write(stdout)
    return child.returncode


def depad(fasta_file, sam_file, bam_file, log_handle):
    log_handle.write(
        "\n================= Converting MIRA assembly from SAM to BAM ===================\n"  # noqa: E501
    )
    # Also doing SAM to (uncompressed) BAM during depad
    bam_stem = (
        bam_file + ".tmp"
    )  # Have write permissions and want final file in this folder
    cmd = 'samtools depad -S -u -T "%s" "%s" | samtools sort - "%s"' % (
        fasta_file,
        sam_file,
        bam_stem,
    )
    return_code = run(cmd, log_handle)
    if return_code:
        return "Error %i from command:\n%s" % (return_code, cmd)
    if not os.path.isfile(bam_stem + ".bam"):
        return "samtools depad or sort failed to produce BAM file"

    log_handle.write(
        "\n====================== Indexing MIRA assembly BAM file =======================\n"  # noqa: E501
    )
    cmd = 'samtools index "%s.bam"' % bam_stem
    return_code = run(cmd, log_handle)
    if return_code:
        return "Error %i from command:\n%s" % (return_code, cmd)
    if not os.path.isfile(bam_stem + ".bam.bai"):
        return "samtools indexing of BAM file failed to produce BAI file"

    shutil.move(bam_stem + ".bam", bam_file)
    os.remove(bam_stem + ".bam.bai")  # Let Galaxy handle that...


def make_bam(mira_convert, maf_file, fasta_file, bam_file, log_handle):
    if not os.path.isfile(maf_file):
        return "Missing input MIRA file: %r" % maf_file
    if not os.path.isfile(fasta_file):
        return "Missing padded FASTA file: %r" % fasta_file

    log_handle.write(
        "\n====================== Converting MIRA assembly to SAM =======================\n"  # noqa: E501
    )
    tmp_dir = tempfile.mkdtemp()
    sam_file = os.path.join(tmp_dir, "x.sam")

    # Note add nbb to the template name, possible MIRA 4.0 RC4 bug
    cmd = '"%s" -f maf -t samnbb "%s" "%snbb"' % (mira_convert, maf_file, sam_file)
    return_code = run(cmd, log_handle)
    if return_code:
        return "Error %i from command:\n%s" % (return_code, cmd)
    if not os.path.isfile(sam_file):
        return "Conversion from MIRA to SAM failed"

    # Also doing SAM to (uncompressed) BAM during depad
    msg = depad(fasta_file, sam_file, bam_file, log_handle)
    if msg:
        return msg

    os.remove(sam_file)
    os.rmdir(tmp_dir)

    return None  # Good :)


if __name__ == "__main__":
    mira_convert, maf_file, fasta_file, bam_file = sys.argv[1:]
    msg = make_bam(mira_convert, maf_file, fasta_file, bam_file, sys.stdout)
    if msg:
        sys.exit(msg)