Mercurial > repos > peterjc > seq_select_by_id

--- a/tools/seq_select_by_id/README.rst	Thu May 21 10:55:50 2015 -0400
+++ b/tools/seq_select_by_id/README.rst	Wed Feb 01 10:51:12 2017 -0500
@@ -1,7 +1,7 @@
 Galaxy tool to select FASTA, QUAL, FASTQ or SFF sequences by ID
 ===============================================================

-This tool is copyright 2011-2015 by Peter Cock, The James Hutton Institute
+This tool is copyright 2011-2017 by Peter Cock, The James Hutton Institute
 (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
 See the licence text below.

@@ -85,6 +85,7 @@
         - Reorder XML elements (internal change only).
         - Planemo for Tool Shed upload (``.shed.yml``, internal change only).
         - Quote filenames in case of spaces (internal change only).
+v0.0.12 - Python style changes (internal change only).
 ======= ======================================================================


@@ -101,12 +102,12 @@
 Planemo commands (which requires you have set your Tool Shed access details in
 ``~/.planemo.yml`` and that you have access rights on the Tool Shed)::

-    $ planemo shed_update --shed_target testtoolshed --check_diff ~/repositories/pico_galaxy/tools/seq_select_by_id/
+    $ planemo shed_update -t testtoolshed --check_diff ~/repositories/pico_galaxy/tools/seq_select_by_id/
     ...

 or::

-    $ planemo shed_update --shed_target toolshed --check_diff ~/repositories/pico_galaxy/tools/seq_select_by_id/
+    $ planemo shed_update -t toolshed --check_diff ~/repositories/pico_galaxy/tools/seq_select_by_id/
     ...

 To just build and check the tar ball, use::
--- a/tools/seq_select_by_id/seq_select_by_id.py	Thu May 21 10:55:50 2015 -0400
+++ b/tools/seq_select_by_id/seq_select_by_id.py	Wed Feb 01 10:51:12 2017 -0500
@@ -16,51 +16,47 @@
 molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
 http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.

-This script is copyright 2011-2013 by Peter Cock, The James Hutton Institute UK.
+This script is copyright 2011-2017 by Peter Cock, The James Hutton Institute UK.
 All rights reserved. See accompanying text file for licence details (MIT
 license).
 """
 import sys

-def sys_exit(msg, err=1):
-    sys.stderr.write(msg.rstrip() + "\n")
-    sys.exit(err)
-
 if "-v" in sys.argv or "--version" in sys.argv:
-    print "v0.0.9"
+    print "v0.0.12"
     sys.exit(0)

-#Parse Command Line
+# Parse Command Line
 try:
     tabular_file, col_arg, in_file, seq_format, out_file = sys.argv[1:]
 except ValueError:
-    sys_exit("Expected five arguments, got %i:\n%s" % (len(sys.argv)-1, " ".join(sys.argv)))
+    sys.exit("Expected five arguments, got %i:\n%s" % (len(sys.argv) - 1, " ".join(sys.argv)))
 try:
     if col_arg.startswith("c"):
-        column = int(col_arg[1:])-1
+        column = int(col_arg[1:]) - 1
     else:
-        column = int(col_arg)-1
+        column = int(col_arg) - 1
 except ValueError:
-    sys_exit("Expected column number, got %s" % col_arg)
+    sys.exit("Expected column number, got %s" % col_arg)

 if seq_format == "fastqcssanger":
-    sys_exit("Colorspace FASTQ not supported.")
+    sys.exit("Colorspace FASTQ not supported.")
 elif seq_format.lower() in ["sff", "fastq", "qual", "fasta"]:
     seq_format = seq_format.lower()
 elif seq_format.lower().startswith("fastq"):
-    #We don't care how the qualities are encoded
+    # We don't care how the qualities are encoded
     seq_format = "fastq"
 elif seq_format.lower().startswith("qual"):
-    #We don't care what the scores are
+    # We don't care what the scores are
     seq_format = "qual"
 else:
-    sys_exit("Unrecognised file format %r" % seq_format)
+    sys.exit("Unrecognised file format %r" % seq_format)


 try:
     from Bio import SeqIO
 except ImportError:
-    sys_exit("Biopython 1.54 or later is required")
+    sys.exit("Biopython 1.54 or later is required")


 def parse_ids(tabular_file, col):
@@ -84,25 +80,25 @@
     if warn:
         sys.stderr.write(warn)

-#Index the sequence file.
-#If very big, could use SeqIO.index_db() to avoid memory bottleneck...
+# Index the sequence file.
+# If very big, could use SeqIO.index_db() to avoid memory bottleneck...
 records = SeqIO.index(in_file, seq_format)
 print "Indexed %i sequences" % len(records)

-if seq_format.lower()=="sff":
-    #Special case to try to preserve the XML manifest
+if seq_format.lower() == "sff":
+    # Special case to try to preserve the XML manifest
     try:
-        from Bio.SeqIO.SffIO import SffIterator, SffWriter
+        from Bio.SeqIO.SffIO import SffWriter
     except ImportError:
-        sys_exit("Requires Biopython 1.54 or later")
+        sys.exit("Requires Biopython 1.54 or later")

     try:
         from Bio.SeqIO.SffIO import ReadRocheXmlManifest
     except ImportError:
-        #Prior to Biopython 1.56 this was a private function
+        # Prior to Biopython 1.56 this was a private function
         from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest

-    in_handle = open(in_file, "rb") #must be binary mode!
+    in_handle = open(in_file, "rb")  # must be binary mode!
     try:
         manifest = ReadRocheXmlManifest(in_handle)
     except ValueError:
@@ -112,21 +108,21 @@
     out_handle = open(out_file, "wb")
     writer = SffWriter(out_handle, xml=manifest)
     count = 0
-    #This does have the overhead of parsing into SeqRecord objects,
-    #but doing the header and index at the low level is too fidly.
+    # This does have the overhead of parsing into SeqRecord objects,
+    # but doing the header and index at the low level is too fidly.
     iterator = (records[name] for name in parse_ids(tabular_file, column))
     try:
         count = writer.write_file(iterator)
     except KeyError, err:
         out_handle.close()
         if name not in records:
-            sys_exit("Identifier %r not found in sequence file" % name)
+            sys.exit("Identifier %r not found in sequence file" % name)
         else:
             raise err
     out_handle.close()
 else:
-    #Avoid overhead of parsing into SeqRecord objects,
-    #just re-use the original formatting from the input file.
+    # Avoid overhead of parsing into SeqRecord objects,
+    # just re-use the original formatting from the input file.
     out_handle = open(out_file, "w")
     count = 0
     for name in parse_ids(tabular_file, column):
@@ -134,7 +130,7 @@
             out_handle.write(records.get_raw(name))
         except KeyError:
             out_handle.close()
-            sys_exit("Identifier %r not found in sequence file" % name)
+            sys.exit("Identifier %r not found in sequence file" % name)
         count += 1
     out_handle.close()
--- a/tools/seq_select_by_id/seq_select_by_id.xml	Thu May 21 10:55:50 2015 -0400
+++ b/tools/seq_select_by_id/seq_select_by_id.xml	Wed Feb 01 10:51:12 2017 -0500
@@ -1,4 +1,4 @@
-<tool id="seq_select_by_id" name="Select sequences by ID" version="0.0.11">
+<tool id="seq_select_by_id" name="Select sequences by ID" version="0.0.12">
     <description>from a tabular file</description>
     <requirements>
         <requirement type="package" version="1.62">biopython</requirement>
--- a/tools/seq_select_by_id/tool_dependencies.xml	Thu May 21 10:55:50 2015 -0400
+++ b/tools/seq_select_by_id/tool_dependencies.xml	Wed Feb 01 10:51:12 2017 -0500
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
     <package name="biopython" version="1.62">
-        <repository changeset_revision="ac9cc2992b69" name="package_biopython_1_62" owner="biopython" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="76434ffd927d" name="package_biopython_1_62" owner="biopython" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>