Mercurial > repos > peterjc > seq_select_by_id
changeset 19:d66a0766f13e draft
planemo upload for repository https://github.com/peterjc/pico_galaxy/tree/master/tools/seq_select_by_id commit 5a5082a1ee1144c5b6ac9c2231239dbfe431e5ab
author | peterjc |
---|---|
date | Wed, 01 Feb 2017 10:51:12 -0500 |
parents | 67c0f7a86cf4 |
children | 6c5822581e41 |
files | tools/seq_select_by_id/README.rst tools/seq_select_by_id/seq_select_by_id.py tools/seq_select_by_id/seq_select_by_id.xml tools/seq_select_by_id/tool_dependencies.xml |
diffstat | 4 files changed, 32 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/seq_select_by_id/README.rst Thu May 21 10:55:50 2015 -0400 +++ b/tools/seq_select_by_id/README.rst Wed Feb 01 10:51:12 2017 -0500 @@ -1,7 +1,7 @@ Galaxy tool to select FASTA, QUAL, FASTQ or SFF sequences by ID =============================================================== -This tool is copyright 2011-2015 by Peter Cock, The James Hutton Institute +This tool is copyright 2011-2017 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. See the licence text below. @@ -85,6 +85,7 @@ - Reorder XML elements (internal change only). - Planemo for Tool Shed upload (``.shed.yml``, internal change only). - Quote filenames in case of spaces (internal change only). +v0.0.12 - Python style changes (internal change only). ======= ====================================================================== @@ -101,12 +102,12 @@ Planemo commands (which requires you have set your Tool Shed access details in ``~/.planemo.yml`` and that you have access rights on the Tool Shed):: - $ planemo shed_update --shed_target testtoolshed --check_diff ~/repositories/pico_galaxy/tools/seq_select_by_id/ + $ planemo shed_update -t testtoolshed --check_diff ~/repositories/pico_galaxy/tools/seq_select_by_id/ ... or:: - $ planemo shed_update --shed_target toolshed --check_diff ~/repositories/pico_galaxy/tools/seq_select_by_id/ + $ planemo shed_update -t toolshed --check_diff ~/repositories/pico_galaxy/tools/seq_select_by_id/ ... To just build and check the tar ball, use::
--- a/tools/seq_select_by_id/seq_select_by_id.py Thu May 21 10:55:50 2015 -0400 +++ b/tools/seq_select_by_id/seq_select_by_id.py Wed Feb 01 10:51:12 2017 -0500 @@ -16,51 +16,47 @@ molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3. http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878. -This script is copyright 2011-2013 by Peter Cock, The James Hutton Institute UK. +This script is copyright 2011-2017 by Peter Cock, The James Hutton Institute UK. All rights reserved. See accompanying text file for licence details (MIT license). """ import sys -def sys_exit(msg, err=1): - sys.stderr.write(msg.rstrip() + "\n") - sys.exit(err) - if "-v" in sys.argv or "--version" in sys.argv: - print "v0.0.9" + print "v0.0.12" sys.exit(0) -#Parse Command Line +# Parse Command Line try: tabular_file, col_arg, in_file, seq_format, out_file = sys.argv[1:] except ValueError: - sys_exit("Expected five arguments, got %i:\n%s" % (len(sys.argv)-1, " ".join(sys.argv))) + sys.exit("Expected five arguments, got %i:\n%s" % (len(sys.argv) - 1, " ".join(sys.argv))) try: if col_arg.startswith("c"): - column = int(col_arg[1:])-1 + column = int(col_arg[1:]) - 1 else: - column = int(col_arg)-1 + column = int(col_arg) - 1 except ValueError: - sys_exit("Expected column number, got %s" % col_arg) + sys.exit("Expected column number, got %s" % col_arg) if seq_format == "fastqcssanger": - sys_exit("Colorspace FASTQ not supported.") + sys.exit("Colorspace FASTQ not supported.") elif seq_format.lower() in ["sff", "fastq", "qual", "fasta"]: seq_format = seq_format.lower() elif seq_format.lower().startswith("fastq"): - #We don't care how the qualities are encoded + # We don't care how the qualities are encoded seq_format = "fastq" elif seq_format.lower().startswith("qual"): - #We don't care what the scores are + # We don't care what the scores are seq_format = "qual" else: - sys_exit("Unrecognised file format %r" % seq_format) + sys.exit("Unrecognised file format %r" % seq_format) try: from Bio import SeqIO except ImportError: - sys_exit("Biopython 1.54 or later is required") + sys.exit("Biopython 1.54 or later is required") def parse_ids(tabular_file, col): @@ -84,25 +80,25 @@ if warn: sys.stderr.write(warn) -#Index the sequence file. -#If very big, could use SeqIO.index_db() to avoid memory bottleneck... +# Index the sequence file. +# If very big, could use SeqIO.index_db() to avoid memory bottleneck... records = SeqIO.index(in_file, seq_format) print "Indexed %i sequences" % len(records) -if seq_format.lower()=="sff": - #Special case to try to preserve the XML manifest +if seq_format.lower() == "sff": + # Special case to try to preserve the XML manifest try: - from Bio.SeqIO.SffIO import SffIterator, SffWriter + from Bio.SeqIO.SffIO import SffWriter except ImportError: - sys_exit("Requires Biopython 1.54 or later") + sys.exit("Requires Biopython 1.54 or later") try: from Bio.SeqIO.SffIO import ReadRocheXmlManifest except ImportError: - #Prior to Biopython 1.56 this was a private function + # Prior to Biopython 1.56 this was a private function from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest - in_handle = open(in_file, "rb") #must be binary mode! + in_handle = open(in_file, "rb") # must be binary mode! try: manifest = ReadRocheXmlManifest(in_handle) except ValueError: @@ -112,21 +108,21 @@ out_handle = open(out_file, "wb") writer = SffWriter(out_handle, xml=manifest) count = 0 - #This does have the overhead of parsing into SeqRecord objects, - #but doing the header and index at the low level is too fidly. + # This does have the overhead of parsing into SeqRecord objects, + # but doing the header and index at the low level is too fidly. iterator = (records[name] for name in parse_ids(tabular_file, column)) try: count = writer.write_file(iterator) except KeyError, err: out_handle.close() if name not in records: - sys_exit("Identifier %r not found in sequence file" % name) + sys.exit("Identifier %r not found in sequence file" % name) else: raise err out_handle.close() else: - #Avoid overhead of parsing into SeqRecord objects, - #just re-use the original formatting from the input file. + # Avoid overhead of parsing into SeqRecord objects, + # just re-use the original formatting from the input file. out_handle = open(out_file, "w") count = 0 for name in parse_ids(tabular_file, column): @@ -134,7 +130,7 @@ out_handle.write(records.get_raw(name)) except KeyError: out_handle.close() - sys_exit("Identifier %r not found in sequence file" % name) + sys.exit("Identifier %r not found in sequence file" % name) count += 1 out_handle.close()
--- a/tools/seq_select_by_id/seq_select_by_id.xml Thu May 21 10:55:50 2015 -0400 +++ b/tools/seq_select_by_id/seq_select_by_id.xml Wed Feb 01 10:51:12 2017 -0500 @@ -1,4 +1,4 @@ -<tool id="seq_select_by_id" name="Select sequences by ID" version="0.0.11"> +<tool id="seq_select_by_id" name="Select sequences by ID" version="0.0.12"> <description>from a tabular file</description> <requirements> <requirement type="package" version="1.62">biopython</requirement>
--- a/tools/seq_select_by_id/tool_dependencies.xml Thu May 21 10:55:50 2015 -0400 +++ b/tools/seq_select_by_id/tool_dependencies.xml Wed Feb 01 10:51:12 2017 -0500 @@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> <package name="biopython" version="1.62"> - <repository changeset_revision="ac9cc2992b69" name="package_biopython_1_62" owner="biopython" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="76434ffd927d" name="package_biopython_1_62" owner="biopython" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> </tool_dependency>