view tools/mummer/mummer.py @ 19:9ad1440b5003 draft default tip

planemo upload for repository https://github.com/peterjc/pico_galaxy/tree/master/tools/mummer commit e2fa1c599b6670b418479447fe5a181a97a6c834-dirty
author peterjc
date Fri, 15 Sep 2017 10:25:42 -0400
parents 0b38302b09c7
children
line wrap: on
line source

#!/usr/bin/env python
"""MUMmer wrapper calling nucmer/promer/mummerplot etc,

Takes the following command line options,
1. FASTA filename of species A
2. FASTA filename of species B
3. Algorithm, nucmer or promer
4. PNG output filename
5. PDF output filename
"""

import os
import shutil
import sys
import tempfile


def run(cmd):
    print(cmd)
    return_code = os.system(cmd)
    if return_code:
        sys.exit("Error %i from: %s" % (return_code, cmd))


if "-v" in sys.argv[1:]or "--version" in sys.argv[1:]:
    print("MUMmer wrapper v0.0.7\n")
    # TODO - How to get a version string from the mummer binary?
    os.system("nucmer --version")
    os.system("promer --version")
    os.system("mummerplot --version")
    os.system("gnuplot --version")
    # TODO - Should we include "gs --version" as a proxy for ps2pdf?
    sys.exit(0)

# Parse Command Line
# TODO - optparse
try:
    fasta_a, fasta_b, algorithm, png_out, pdf_out = sys.argv[1:]
except ValueError:
    sys.exit("Expect 5 arguments, got %i" % (len(sys.argv) - 1))


valid_algo = ["mummer", "nucmer", "promer"]
if algorithm not in valid_algo:
    sys.exit("Invalid algorithm argument %r, should be: %s" % (algorithm, ", ".join(valid_algo)))

base_path = tempfile.mkdtemp()
prefix = os.path.join(base_path, "ref_qry")
coords = prefix + ".mums"
# gnuplot = prefix + ".gp"
ps_image = prefix + ".ps"
png_image = prefix + ".png"

if algorithm == "mummer":
    # Add -mum as per example to find maximal unique matches between ref and query.
    # Add the -b -c options to search both strands and report relative to forward strand
    # which then matches the default dual-strand approach in nucmer and promer
    cmd = '%s -mum -b -c "%s" "%s" > %s' % (algorithm, fasta_a, fasta_b, coords)
else:
    coords = "out.delta"
    cmd = '%s "%s" "%s"' % (algorithm, fasta_a, fasta_b)
run(cmd)

output_failed = False

# PNG
# ===
cmd = 'mummerplot -R "%s" -Q "%s" --png --large --prefix=%s %s' % (fasta_a, fasta_b, prefix, coords)
run(cmd)
if os.path.isfile(png_image):
    shutil.move(png_image, png_out)
else:
    sys.stderr.write("ERROR: PNG file not created.\n")
    output_failed = True

# PS --> PDF
# ==========
# Using --large, puts "set size 3,3" in the gnuplot - which seems to mess up.
# Problem here is the default bbox (BoundingBox) in the PS output is letter page size,
# and even if we override that, at least when view the PS output in Adobe Illustrator
# things don't seem to be lined up properly :(
#
# Using "set size 1,1" works better - which is what --small gives:
cmd = 'mummerplot -R "%s" -Q "%s" --postscript --small --prefix=%s %s' % (fasta_a, fasta_b, prefix, coords)
run(cmd)
if not os.path.isfile(ps_image):
    sys.stderr.write("ERROR: PostScript file needed for PDF output was not created.\n")
    output_failed = True
else:
    cmd = 'ps2pdf -dEPSCrop "%s" "%s"' % (ps_image, pdf_out)
    run(cmd)
    if not os.path.isfile(pdf_out):
        sys.stderr.write("ERROR: PDF file not created.\n")
        output_failed = True

# Remove temp files...
os.remove(coords)  # Might not be under the temp directory...
shutil.rmtree(base_path)

if output_failed:
    sys.exit("ERROR: Failed to produce output file(s).")