diff tools/mira4/mira4.py @ 4:df86ed992a1b draft

Uploaded preview 4, lots of work on mapping
author peterjc
date Fri, 11 Oct 2013 04:28:45 -0400
parents 32f693f6e741
children ffefb87bd414
line wrap: on
line diff
--- a/tools/mira4/mira4.py	Thu Sep 26 12:30:08 2013 -0400
+++ b/tools/mira4/mira4.py	Fri Oct 11 04:28:45 2013 -0400
@@ -31,7 +31,7 @@
     return ver.split("\n", 1)[0]
 
 
-os.environ["PATH"] = "/mnt/galaxy/downloads/mira_4.0rc2_linux-gnu_x86_64_static/bin/:%s" % os.environ["PATH"]
+os.environ["PATH"] = "/mnt/galaxy/downloads/mira_4.0rc3_linux-gnu_x86_64_static/bin/:%s" % os.environ["PATH"]
 mira_binary = "mira"
 mira_ver = get_version(mira_binary)
 if not mira_ver.strip().startswith("4.0"):
@@ -51,41 +51,7 @@
     sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60))
 
 
-def massage_symlinks(manifest):
-    """Create FASTQ aliases and edit the manifest to use them.
-
-    Short term measure for MIRA 4.0RC2 which depends on data file
-    extensions to decide the file format, and doesn't like *.dat
-    as used in Galaxy.
-    """
-    base = os.path.split(manifest)[0]
-    with open(manifest) as h:
-        lines = h.readlines()
-    f = 0
-    for i, line in enumerate(lines):
-         if not line.startswith("data ="):
-             continue
-         #Assumes no spaces in filename, would they have to be escaped?
-         new_line = "data ="
-         for filename in line[6:].strip().split():
-             if not filename:
-                 continue
-             assert os.path.isfile(filename), filename
-             f += 1
-             alias = os.path.join(base, "input%i.fastq" % f)
-             new_line += " " + alias
-             cmd = "ln -s %s %s" % (filename, alias)
-             if os.system(cmd):
-                 stop_err("Problem creating FASTQ alias:\n%s" % cmd)
-         lines[i] = new_line + "\n"
-    with open(manifest, "w") as h:
-        for line in lines:
-            #sys.stderr.write(line)
-            h.write(line)
-    return True
-
-
-def collect_output(temp, name):
+def collect_output(temp, name, handle):
     n3 = (temp, name, name, name)
     f = "%s/%s_assembly/%s_d_results" % (temp, name, name)
     if not os.path.isdir(f):
@@ -95,16 +61,34 @@
         log_manifest(manifest)
         stop_err("Empty output folder")
     missing = []
-    for old, new in [("%s/%s_out.maf" % (f, name), out_maf),
-                     ("%s/%s_out.unpadded.fasta" % (f, name), out_fasta)]:
+
+    old_maf = "%s/%s_out.maf" % (f, name)
+    if not os.path.isfile(old_maf):
+        #Triggered extractLargeContigs.sh?
+        old_maf = "%s/%s_LargeContigs_out.maf" % (f, name)
+
+    #De novo or single strain mapping,
+    old_fasta = "%s/%s_out.unpadded.fasta" % (f, name)
+    if not os.path.isfile(old_fasta):
+        #Mapping (currently StrainX versus reference)
+        old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name)
+    if not os.path.isfile(old_fasta):
+        #Triggered extractLargeContigs.sh?
+        old_fasta = "%s/%s_LargeContigs_out.fasta" % (f, name)
+
+    missing = False
+    for old, new in [(old_maf, out_maf),
+                     (old_fasta, out_fasta)]:
         if not os.path.isfile(old):
-            missing.append(os.path.splitext(old)[-1])
+            missing = True
         else:
+            handle.write("Capturing %s\n" % old)
             shutil.move(old, new)
     if missing:
         log_manifest(manifest)
-        sys.stderr.write("Contents of %r: %r\n" % (f, os.listdir(f)))
-        stop_err("Missing output files: %s" % ", ".join(missing))
+        sys.stderr.write("Contents of %r:\n" % f)
+        for filename in sorted(os.listdir(f)):
+            sys.stderr.write("%s\n" % filename)
 
 def clean_up(temp, name):
     folder = "%s/%s_assembly" % (temp, name)
@@ -119,9 +103,6 @@
 name = "MIRA"
 manifest, out_maf, out_fasta, out_log = sys.argv[1:5]
 
-#Hack until MIRA v4 lets us specify file format explicitly,
-massage_symlinks(manifest)
-
 start_time = time.time()
 #cmd_list =sys.argv[8:]
 cmd_list = [mira_binary, manifest]
@@ -142,6 +123,15 @@
 #print cmd
 
 handle = open(out_log, "w")
+handle.write("======================== MIRA manifest (instructions) ========================\n")
+m = open(manifest, "rU")
+for line in m:
+    handle.write(line)
+m.close()
+del m
+handle.write("\n")
+handle.write("============================ Starting MIRA now ===============================\n")
+handle.flush()
 try:
     #Run MIRA
     child = subprocess.Popen(cmd_list,
@@ -159,8 +149,10 @@
 assert not stdout and not stderr #Should be empty as sent to handle
 run_time = time.time() - start_time
 return_code = child.returncode
-handle.write("\n\nMIRA took %0.2f minutes\n" % (run_time / 60.0))
-print "MIRA took %0.2f minutes" % (run_time / 60.0)
+handle.write("\n")
+handle.write("============================ MIRA has finished ===============================\n")
+handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0))
+print "MIRA took %0.2f hours" % (run_time / 3600.0)
 if return_code:
     handle.write("Return error code %i from command:\n" % return_code)
     handle.write(cmd + "\n")
@@ -169,12 +161,30 @@
     log_manifest(manifest)
     stop_err("Return error code %i from command:\n%s" % (return_code, cmd),
              return_code)
-handle.close()
+handle.flush()
+
+if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"):
+    handle.write("\n")
+    handle.write("====================== Extract Large Contigs failed ==========================\n")
+    e = open("MIRA_assembly/MIRA_d_results/ec.log", "rU")
+    for line in e:
+        handle.write(line)
+    e.close()
+    handle.write("============================ (end of ec.log) =================================\n")
+    handle.flush()
 
 #print "Collecting output..."
-collect_output(temp, name)
+collect_output(temp, name, handle)
+
+if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"):
+    #Treat as an error, but doing this AFTER collect_output
+    sys.stderr.write("Extract Large Contigs failed\n")
+    handle.write("Extract Large Contigs failed\n")
+    handle.close()
+    sys.exit(1)
 
 #print "Cleaning up..."
 clean_up(temp, name)
 
-print "Done"
+handle.close()
+print("Done")