Mercurial > repos > peterjc > mira4_assembler
changeset 20:aeb3e35f8236 draft
Uploaded v0.0.4 preview, made MAF and BAM output optional
author | peterjc |
---|---|
date | Tue, 10 Jun 2014 10:11:58 -0400 |
parents | 8487d70e82aa |
children | 4abe8d59a438 |
files | tools/mira4/README.rst tools/mira4/mira4.py tools/mira4/mira4_de_novo.xml tools/mira4/mira4_mapping.xml tools/mira4/repository_dependencies.xml tools/mira4/tool_dependencies.xml |
diffstat | 6 files changed, 97 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/mira4/README.rst Wed May 21 06:56:06 2014 -0400 +++ b/tools/mira4/README.rst Tue Jun 10 10:11:58 2014 -0400 @@ -91,6 +91,8 @@ (``$TMPDIR``, ``$TEMP``, or ``$TMP`` in that order). This was previously hard coded as ``/tmp``. v0.0.3 - Updated to target MIRA 4.0.2 +v0.0.4 - Using optparse for the Python wrapper script API + - Made MAF and BAM outputs optional ======= ======================================================================
--- a/tools/mira4/mira4.py Wed May 21 06:56:06 2014 -0400 +++ b/tools/mira4/mira4.py Tue Jun 10 10:11:58 2014 -0400 @@ -7,11 +7,12 @@ import shutil import time import tempfile +from optparse import OptionParser #Do we need any PYTHONPATH magic? from mira4_make_bam import make_bam -WRAPPER_VER = "0.0.1" #Keep in sync with the XML file +WRAPPER_VER = "0.0.4" #Keep in sync with the XML file def stop_err(msg, err=1): sys.stderr.write(msg+"\n") @@ -34,6 +35,35 @@ del child return ver.split("\n", 1)[0].strip() +#Parse Command Line +usage = """Galaxy MIRA4 wrapper script v%s - use as follows: + +$ python mira4.py ... + +This will run the MIRA binary and collect its output files as directed. +""" % WRAPPER_VER +parser = OptionParser(usage=usage) +parser.add_option("-m", "--manifest", dest="manifest", + default=None, metavar="FILE", + help="MIRA manifest filename") +parser.add_option("--maf", dest="maf", + default="-", metavar="FILE", + help="MIRA MAF output filename") +parser.add_option("--bam", dest="bam", + default="-", metavar="FILE", + help="Unpadded BAM output filename") +parser.add_option("--fasta", dest="fasta", + default="-", metavar="FILE", + help="Unpadded FASTA output filename") +parser.add_option("--log", dest="log", + default="-", metavar="FILE", + help="MIRA logging output filename") +options, args = parser.parse_args() +manifest = options.manifest +out_maf = options.maf +out_bam = options.bam +out_fasta = options.fasta +out_log = options.log try: mira_path = os.environ["MIRA4"] @@ -60,6 +90,11 @@ print "WARNING: miraconvert %s" % mira_convert_ver sys.exit(0) +if not manifest: + stop_err("Manifest is required") +elif not os.path.isfile(manifest): + stop_err("Missing input MIRA manifest file: %r" % manifest) + try: threads = int(os.environ.get("GALAXY_SLOTS", "1")) @@ -143,6 +178,8 @@ (old_fasta, out_fasta)]: if not os.path.isfile(old): missing = True + elif not new or new == "-": + handle.write("Ignoring %s\n" % old) else: handle.write("Capturing %s\n" % old) shutil.move(old, new) @@ -154,9 +191,10 @@ #For mapping mode, probably most people would expect a BAM file #using the reference FASTA file... - msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle) - if msg: - stop_err(msg) + if out_bam and out_bam != "-": + msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle) + if msg: + stop_err(msg) def clean_up(temp, name): folder = "%s/%s_assembly" % (temp, name) @@ -167,14 +205,12 @@ #Currently Galaxy puts us somewhere safe like: #/opt/galaxy-dist/database/job_working_directory/846/ temp = "." -#name, out_fasta, out_qual, out_ace, out_caf, out_wig, out_log = sys.argv[1:8] + name = "MIRA" -manifest, out_maf, out_bam, out_fasta, out_log = sys.argv[1:] override_temp(manifest) start_time = time.time() -#cmd_list =sys.argv[8:] cmd_list = [mira_binary, "-t", str(threads), manifest] cmd = " ".join(cmd_list) @@ -192,7 +228,10 @@ #print os.path.abspath(".") #print cmd -handle = open(out_log, "w") +if out_log and out_log != "-": + handle = open(out_log, "w") +else: + handle = open(os.devnull, "w") handle.write("======================== MIRA manifest (instructions) ========================\n") m = open(manifest, "rU") for line in m:
--- a/tools/mira4/mira4_de_novo.xml Wed May 21 06:56:06 2014 -0400 +++ b/tools/mira4/mira4_de_novo.xml Tue Jun 10 10:11:58 2014 -0400 @@ -1,4 +1,4 @@ -<tool id="mira_4_0_de_novo" name="MIRA v4.0 de novo assember" version="0.0.3"> +<tool id="mira_4_0_de_novo" name="MIRA v4.0 de novo assember" version="0.0.4"> <description>Takes Sanger, Roche 454, Solexa/Illumina, Ion Torrent and PacBio reads</description> <requirements> <requirement type="binary">mira</requirement> @@ -8,8 +8,16 @@ <requirement type="package" version="0.1.19">samtools</requirement> </requirements> <version_command interpreter="python">mira4.py --version</version_command> - <command interpreter="python"> -mira4.py "$manifest" "$out_maf" "$out_bam" "$out_fasta" "$out_log" + <command interpreter="python">mira4.py +--manifest "$manifest" +#if str($maf_wanted)=="true": +--maf "$out_maf" +#end if +#if str($bam_wanted)=="true": +--bam "$out_bam" +#end if +--fasta "$out_fasta" +--log "$out_log" </command> <stdio> <!-- Assume anything other than zero is an error --> @@ -67,12 +75,18 @@ <param name="filenames" type="data" format="fastq,mira" multiple="true" required="true" label="Read file(s)" help="Multiple files allowed, for example paired reads can be given as two files (MIRA looks at read names to identify pairs)." /> </repeat> + <param name="maf_wanted" type="boolean" label="Output assembly in MIRA's own format?" checked="False" /> + <param name="bam_wanted" type="boolean" label="Convert assembly into BAM format?" checked="True" /> </inputs> <code file="mira4_validator.py" /> <outputs> <data name="out_fasta" format="fasta" label="MIRA de novo contigs (FASTA)" /> - <data name="out_bam" format="bam" label="MIRA de novo assembly (BAM)" /> - <data name="out_maf" format="mira" label="MIRA de novo assembly" /> + <data name="out_bam" format="bam" label="MIRA de novo assembly (BAM)"> + <filter>bam_wanted is True</filter> + </data> + <data name="out_maf" format="mira" label="MIRA de novo assembly"> + <filter>maf_wanted is True</filter> + </data> <!-- TODO? <data name="out_contigstats" format="tabular" label="MIRA contig stats" /> --> @@ -162,6 +176,8 @@ <param name="technology" value="sanger" /> <param name="type" value="none" /> <param name="filenames" value="U13small_m.fastq" ftype="fastqsanger" /> + <param name="maf_wanted" value="true"/> + <param name="bam_wanted" value="true"/> <output name="out_fasta" file="U13small_m.mira4_de_novo.fasta" ftype="fasta" /> <output name="out_bam" file="empty_file.dat" compare="contains" /> <output name="out_maf" file="empty_file.dat" compare="contains" /> @@ -176,6 +192,8 @@ <param name="job_quality" value="accurate" /> <param name="type" value="none" /> <param name="filenames" value="ecoli.fastq" ftype="fastqsanger" /> + <param name="maf_wanted" value="true"/> + <param name="bam_wanted" value="true"/> <output name="out_fasta" file="ecoli.mira4_de_novo.fasta" ftype="fasta" /> <output name="out_bam" file="empty_file.dat" compare="contains" /> <output name="out_maf" file="empty_file.dat" compare="contains" />
--- a/tools/mira4/mira4_mapping.xml Wed May 21 06:56:06 2014 -0400 +++ b/tools/mira4/mira4_mapping.xml Tue Jun 10 10:11:58 2014 -0400 @@ -1,4 +1,4 @@ -<tool id="mira_4_0_mapping" name="MIRA v4.0 mapping" version="0.0.3"> +<tool id="mira_4_0_mapping" name="MIRA v4.0 mapping" version="0.0.4"> <description>Maps Sanger, Roche 454, Solexa/Illumina, Ion Torrent and PacBio reads</description> <requirements> <requirement type="binary">mira</requirement> @@ -8,8 +8,16 @@ <requirement type="package" version="0.1.19">samtools</requirement> </requirements> <version_command interpreter="python">mira4.py --version</version_command> - <command interpreter="python"> -mira4.py "$manifest" "$out_maf" "$out_bam" "$out_fasta" "$out_log" + <command interpreter="python">mira4.py +--manifest "$manifest" +#if str($maf_wanted) == "true": +--maf "$out_maf" +#end if +#if str($bam_wanted) == "true": +--bam "$out_bam" +#end if +--fasta "$out_fasta" +--log "$out_log" </command> <stdio> <!-- Assume anything other than zero is an error --> @@ -69,11 +77,17 @@ <param name="filenames" type="data" format="fastq,mira" multiple="true" required="true" label="Read file(s)" help="Multiple files allowed, for example paired reads can be given as two files (MIRA looks at read names to identify pairs)." /> </repeat> + <param name="maf_wanted" type="boolean" label="Output mapping in MIRA's own format?" checked="False" /> + <param name="bam_wanted" type="boolean" label="Convert mapping into BAM format?" checked="True" /> </inputs> <outputs> <data name="out_fasta" format="fasta" label="MIRA #if str($strain_setup)=='same' then 'same strain' else 'reference' # mapping contigs (FASTA)" /> - <data name="out_bam" format="bam" label="MIRA #if str($strain_setup)=='same' then 'same strain' else 'reference' # mapping assembly (BAM)" /> - <data name="out_maf" format="mira" label="MIRA #if str($strain_setup)=='same' then 'same strain' else 'reference' # mapping assembly" /> + <data name="out_bam" format="bam" label="MIRA #if str($strain_setup)=='same' then 'same strain' else 'reference' # mapping assembly (BAM)"> + <filter>bam_wanted is True</filter> + </data> + <data name="out_maf" format="mira" label="MIRA #if str($strain_setup)=='same' then 'same strain' else 'reference' # mapping assembly"> + <filter>maf_wanted is True</filter> + </data> <data name="out_log" format="txt" label="MIRA #if str($strain_setup)=='same' then 'same strain' else 'reference' # mapping log" /> </outputs> <configfiles> @@ -168,6 +182,8 @@ <param name="strain_setup" value="default" /> <param name="type" value="none" /> <param name="filenames" value="tvc_mini.fastq" ftype="fastqsanger" /> + <param name="maf_wanted" value="true"/> + <param name="bam_wanted" value="true"/> <output name="out_fasta" file="tvc_map_ref_strain.fasta" ftype="fasta" /> <output name="out_bam" file="empty_file.dat" compare="contains" /> <output name="out_maf" file="empty_file.dat" compare="contains" /> @@ -180,6 +196,8 @@ <param name="strain_setup" value="same" /> <param name="type" value="none" /> <param name="filenames" value="tvc_mini.fastq" ftype="fastqsanger" /> + <param name="maf_wanted" value="true"/> + <param name="bam_wanted" value="true"/> <output name="out_fasta" file="tvc_map_same_strain.fasta" ftype="fasta" /> <output name="out_bam" file="empty_file.dat" compare="contains" /> <output name="out_maf" file="empty_file.dat" compare="contains" />
--- a/tools/mira4/repository_dependencies.xml Wed May 21 06:56:06 2014 -0400 +++ b/tools/mira4/repository_dependencies.xml Tue Jun 10 10:11:58 2014 -0400 @@ -1,4 +1,4 @@ <?xml version="1.0"?> <repositories description="This requires the MIRA datatype definitions (e.g. the MIRA Assembly Format)."> - <repository changeset_revision="29ff2a2b6eaa" name="mira_datatypes" owner="peterjc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="29ff2a2b6eaa" name="mira_datatypes" owner="peterjc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </repositories>
--- a/tools/mira4/tool_dependencies.xml Wed May 21 06:56:06 2014 -0400 +++ b/tools/mira4/tool_dependencies.xml Tue Jun 10 10:11:58 2014 -0400 @@ -1,7 +1,7 @@ <?xml version="1.0"?> <tool_dependency> <package name="samtools" version="0.1.19"> - <repository changeset_revision="40250a414486" name="package_samtools_0_1_19" owner="iuc" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="40250a414486" name="package_samtools_0_1_19" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> <package name="MIRA" version="4.0"> <install version="1.0">