Mercurial > repos > peterjc > mira4_assembler
diff tools/mira4/mira4.py @ 4:df86ed992a1b draft
Uploaded preview 4, lots of work on mapping
author | peterjc |
---|---|
date | Fri, 11 Oct 2013 04:28:45 -0400 |
parents | 32f693f6e741 |
children | ffefb87bd414 |
line wrap: on
line diff
--- a/tools/mira4/mira4.py Thu Sep 26 12:30:08 2013 -0400 +++ b/tools/mira4/mira4.py Fri Oct 11 04:28:45 2013 -0400 @@ -31,7 +31,7 @@ return ver.split("\n", 1)[0] -os.environ["PATH"] = "/mnt/galaxy/downloads/mira_4.0rc2_linux-gnu_x86_64_static/bin/:%s" % os.environ["PATH"] +os.environ["PATH"] = "/mnt/galaxy/downloads/mira_4.0rc3_linux-gnu_x86_64_static/bin/:%s" % os.environ["PATH"] mira_binary = "mira" mira_ver = get_version(mira_binary) if not mira_ver.strip().startswith("4.0"): @@ -51,41 +51,7 @@ sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60)) -def massage_symlinks(manifest): - """Create FASTQ aliases and edit the manifest to use them. - - Short term measure for MIRA 4.0RC2 which depends on data file - extensions to decide the file format, and doesn't like *.dat - as used in Galaxy. - """ - base = os.path.split(manifest)[0] - with open(manifest) as h: - lines = h.readlines() - f = 0 - for i, line in enumerate(lines): - if not line.startswith("data ="): - continue - #Assumes no spaces in filename, would they have to be escaped? - new_line = "data =" - for filename in line[6:].strip().split(): - if not filename: - continue - assert os.path.isfile(filename), filename - f += 1 - alias = os.path.join(base, "input%i.fastq" % f) - new_line += " " + alias - cmd = "ln -s %s %s" % (filename, alias) - if os.system(cmd): - stop_err("Problem creating FASTQ alias:\n%s" % cmd) - lines[i] = new_line + "\n" - with open(manifest, "w") as h: - for line in lines: - #sys.stderr.write(line) - h.write(line) - return True - - -def collect_output(temp, name): +def collect_output(temp, name, handle): n3 = (temp, name, name, name) f = "%s/%s_assembly/%s_d_results" % (temp, name, name) if not os.path.isdir(f): @@ -95,16 +61,34 @@ log_manifest(manifest) stop_err("Empty output folder") missing = [] - for old, new in [("%s/%s_out.maf" % (f, name), out_maf), - ("%s/%s_out.unpadded.fasta" % (f, name), out_fasta)]: + + old_maf = "%s/%s_out.maf" % (f, name) + if not os.path.isfile(old_maf): + #Triggered extractLargeContigs.sh? + old_maf = "%s/%s_LargeContigs_out.maf" % (f, name) + + #De novo or single strain mapping, + old_fasta = "%s/%s_out.unpadded.fasta" % (f, name) + if not os.path.isfile(old_fasta): + #Mapping (currently StrainX versus reference) + old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name) + if not os.path.isfile(old_fasta): + #Triggered extractLargeContigs.sh? + old_fasta = "%s/%s_LargeContigs_out.fasta" % (f, name) + + missing = False + for old, new in [(old_maf, out_maf), + (old_fasta, out_fasta)]: if not os.path.isfile(old): - missing.append(os.path.splitext(old)[-1]) + missing = True else: + handle.write("Capturing %s\n" % old) shutil.move(old, new) if missing: log_manifest(manifest) - sys.stderr.write("Contents of %r: %r\n" % (f, os.listdir(f))) - stop_err("Missing output files: %s" % ", ".join(missing)) + sys.stderr.write("Contents of %r:\n" % f) + for filename in sorted(os.listdir(f)): + sys.stderr.write("%s\n" % filename) def clean_up(temp, name): folder = "%s/%s_assembly" % (temp, name) @@ -119,9 +103,6 @@ name = "MIRA" manifest, out_maf, out_fasta, out_log = sys.argv[1:5] -#Hack until MIRA v4 lets us specify file format explicitly, -massage_symlinks(manifest) - start_time = time.time() #cmd_list =sys.argv[8:] cmd_list = [mira_binary, manifest] @@ -142,6 +123,15 @@ #print cmd handle = open(out_log, "w") +handle.write("======================== MIRA manifest (instructions) ========================\n") +m = open(manifest, "rU") +for line in m: + handle.write(line) +m.close() +del m +handle.write("\n") +handle.write("============================ Starting MIRA now ===============================\n") +handle.flush() try: #Run MIRA child = subprocess.Popen(cmd_list, @@ -159,8 +149,10 @@ assert not stdout and not stderr #Should be empty as sent to handle run_time = time.time() - start_time return_code = child.returncode -handle.write("\n\nMIRA took %0.2f minutes\n" % (run_time / 60.0)) -print "MIRA took %0.2f minutes" % (run_time / 60.0) +handle.write("\n") +handle.write("============================ MIRA has finished ===============================\n") +handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0)) +print "MIRA took %0.2f hours" % (run_time / 3600.0) if return_code: handle.write("Return error code %i from command:\n" % return_code) handle.write(cmd + "\n") @@ -169,12 +161,30 @@ log_manifest(manifest) stop_err("Return error code %i from command:\n%s" % (return_code, cmd), return_code) -handle.close() +handle.flush() + +if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): + handle.write("\n") + handle.write("====================== Extract Large Contigs failed ==========================\n") + e = open("MIRA_assembly/MIRA_d_results/ec.log", "rU") + for line in e: + handle.write(line) + e.close() + handle.write("============================ (end of ec.log) =================================\n") + handle.flush() #print "Collecting output..." -collect_output(temp, name) +collect_output(temp, name, handle) + +if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): + #Treat as an error, but doing this AFTER collect_output + sys.stderr.write("Extract Large Contigs failed\n") + handle.write("Extract Large Contigs failed\n") + handle.close() + sys.exit(1) #print "Cleaning up..." clean_up(temp, name) -print "Done" +handle.close() +print("Done")