view tools/mira4_0/mira4_convert.py @ 39:bbf14bb9607b draft default tip

planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
author peterjc
date Mon, 03 Jun 2019 13:29:00 -0400 (2019-06-03)
parents cee8f9005e43
children
line wrap: on
line source
#!/usr/bin/env python
"""A simple wrapper script to call MIRA and collect its output.

This focuses on the miraconvert binary.
"""

from __future__ import print_function

import os
import shutil
import subprocess
import sys

from optparse import OptionParser

try:
    from io import BytesIO
except ImportError:
    # Should we worry about Python 2.5 or older?
    from StringIO import StringIO as BytesIO

# Do we need any PYTHONPATH magic?
from mira4_make_bam import depad

WRAPPER_VER = "0.0.11"  # Keep in sync with the XML file


def run(cmd):
    # Avoid using shell=True when we call subprocess to ensure if the Python
    # script is killed, so too is the child process.
    try:
        child = subprocess.Popen(
            cmd, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
        )
    except Exception as err:
        sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
    # Use .communicate as can get deadlocks with .wait(),
    stdout, stderr = child.communicate()
    return_code = child.returncode
    if return_code:
        cmd_str = " ".join(cmd)  # doesn't quote spaces etc
        if stderr and stdout:
            sys.exit(
                "Return code %i from command:\n%s\n\n%s\n\n%s"
                % (return_code, cmd_str, stdout, stderr)
            )
        else:
            sys.exit(
                "Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)
            )


def get_version(mira_binary):
    """Run MIRA to find its version number."""
    # At the commend line I would use: mira -v | head -n 1
    # however there is some pipe error when doing that here.
    cmd = [mira_binary, "-v"]
    try:
        child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    except Exception as err:
        sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
        sys.exit(1)
    ver, tmp = child.communicate()
    del child
    return ver.split("\n", 1)[0].strip()


# Parse Command Line
usage = (
    """Galaxy MIRA4 wrapper script v%s - use as follows:

$ python mira4_convert.py ...

This will run the MIRA miraconvert binary and collect its output files as directed.
"""
    % WRAPPER_VER
)
parser = OptionParser(usage=usage)
parser.add_option(
    "--input", dest="input", default=None, metavar="FILE", help="MIRA input filename"
)
parser.add_option(
    "-x", "--min_length", dest="min_length", default="0", help="Minimum contig length"
)
parser.add_option(
    "-y",
    "--min_cover",
    dest="min_cover",
    default="0",
    help="Minimum average contig coverage",
)
parser.add_option(
    "-z", "--min_reads", dest="min_reads", default="0", help="Minimum reads per contig"
)
parser.add_option(
    "--maf", dest="maf", default="", metavar="FILE", help="MIRA MAF output filename"
)
parser.add_option(
    "--ace", dest="ace", default="", metavar="FILE", help="ACE output filename"
)
parser.add_option(
    "--bam", dest="bam", default="", metavar="FILE", help="Unpadded BAM output filename"
)
parser.add_option(
    "--fasta",
    dest="fasta",
    default="",
    metavar="FILE",
    help="Unpadded FASTA output filename",
)
parser.add_option(
    "--cstats",
    dest="cstats",
    default="",
    metavar="FILE",
    help="Contig statistics filename",
)
parser.add_option(
    "-v",
    "--version",
    dest="version",
    default=False,
    action="store_true",
    help="Show version and quit",
)
options, args = parser.parse_args()
if args:
    sys.exit("Expected options (e.g. --input example.maf), not arguments")

input_maf = options.input
out_maf = options.maf
out_bam = options.bam
out_fasta = options.fasta
out_ace = options.ace
out_cstats = options.cstats

if "MIRA4" in os.environ:
    mira_path = os.environ["MIRA4"]
    mira_convert = os.path.join(mira_path, "miraconvert")
    if not os.path.isfile(mira_convert):
        sys.exit(
            "Missing miraconvert under $MIRA4, %r\nFolder contained: %s"
            % (mira_convert, ", ".join(os.listdir(mira_path)))
        )
else:
    sys.stderr.write(
        "DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n"
    )
    mira_path = None
    mira_convert = "miraconvert"

mira_convert_ver = get_version(mira_convert)
if not mira_convert_ver.strip().startswith("4.0"):
    sys.exit(
        "This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert)
    )
if options.version:
    print("%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER))
    sys.exit(0)

if not input_maf:
    sys.exit("Input MIRA file is required")
elif not os.path.isfile(input_maf):
    sys.exit("Missing input MIRA file: %r" % input_maf)

if not (out_maf or out_bam or out_fasta or out_ace or out_cstats):
    sys.exit("No output requested")


def check_min_int(value, name):
    try:
        i = int(value)
    except ValueError:
        sys.exit("Bad %s setting, %r" % (name, value))
    if i < 0:
        sys.exit("Negative %s setting, %r" % (name, value))
    return i


min_length = check_min_int(options.min_length, "minimum length")
min_cover = check_min_int(options.min_cover, "minimum cover")
min_reads = check_min_int(options.min_reads, "minimum reads")

# TODO - Run MIRA in /tmp or a configurable directory?
# Currently Galaxy puts us somewhere safe like:
# /opt/galaxy-dist/database/job_working_directory/846/
temp = "."


cmd_list = [mira_convert]
if min_length:
    cmd_list.extend(["-x", str(min_length)])
if min_cover:
    cmd_list.extend(["-y", str(min_cover)])
if min_reads:
    cmd_list.extend(["-z", str(min_reads)])
cmd_list.extend(["-f", "maf", input_maf, os.path.join(temp, "converted")])
if out_maf:
    cmd_list.append("maf")
if out_bam:
    cmd_list.append("samnbb")
    if not out_fasta:
        # Need this for samtools depad
        out_fasta = os.path.join(temp, "depadded.fasta")
if out_fasta:
    cmd_list.append("fasta")
if out_ace:
    cmd_list.append("ace")
if out_cstats:
    cmd_list.append("cstats")
run(cmd_list)


def collect(old, new):
    if not os.path.isfile(old):
        sys.exit("Missing expected output file %s" % old)
    shutil.move(old, new)


if out_maf:
    collect(os.path.join(temp, "converted.maf"), out_maf)
if out_fasta:
    # Can we look at the MAF file to see if there are multiple strains?
    old = os.path.join(temp, "converted_AllStrains.unpadded.fasta")
    if os.path.isfile(old):
        collect(old, out_fasta)
    else:
        # Might the output be filtered down to zero contigs?
        old = os.path.join(temp, "converted.fasta")
        if not os.path.isfile(old):
            sys.exit("Missing expected output FASTA file")
        elif os.path.getsize(old) == 0:
            print("Warning - no contigs (harsh filters?)")
            collect(old, out_fasta)
        else:
            sys.exit("Missing expected output FASTA file (only generic file present)")
if out_ace:
    collect(os.path.join(temp, "converted.maf"), out_ace)
if out_cstats:
    collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats)

if out_bam:
    assert os.path.isfile(out_fasta)
    old = os.path.join(temp, "converted.samnbb")
    if not os.path.isfile(old):
        old = os.path.join(temp, "converted.sam")
    if not os.path.isfile(old):
        sys.exit("Missing expected intermediate file %s" % old)
    h = BytesIO()
    msg = depad(out_fasta, old, out_bam, h)
    if msg:
        print(msg)
        print(h.getvalue())
        h.close()
        sys.exit(1)
    h.close()
    if out_fasta == os.path.join(temp, "depadded.fasta"):
        # Not asked for by Galaxy, no longer needed
        os.remove(out_fasta)

if min_length or min_cover or min_reads:
    print("Filtered.")
else:
    print("Converted.")