changeset 25:c32600ce6c9a draft

planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/tools/blastxml_to_top_descr commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
author peterjc
date Fri, 22 Feb 2019 09:56:23 -0500
parents c07b61d069ac
children a2e7cb2e0a30
files tools/blastxml_to_top_descr/blastxml_to_top_descr.py tools/blastxml_to_top_descr/repository_dependencies.xml
diffstat 2 files changed, 67 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/tools/blastxml_to_top_descr/blastxml_to_top_descr.py	Fri Nov 09 11:59:43 2018 -0500
+++ b/tools/blastxml_to_top_descr/blastxml_to_top_descr.py	Fri Feb 22 09:56:23 2019 -0500
@@ -24,6 +24,7 @@
 else:
     from galaxy import eggs  # noqa - ignore flake8 F401
     import pkg_resources
+
     pkg_resources.require("elementtree")
     from elementtree import ElementTree
 
@@ -41,23 +42,54 @@
 """
 
 parser = OptionParser(usage=usage)
-parser.add_option("-t", "--topN", dest="topN", default=3,
-                  help="Number of descriptions to collect (in order from file)")
-parser.add_option("-o", "--output", dest="out_file", default=None,
-                  help="Output filename for tabular file",
-                  metavar="FILE")
-parser.add_option("-f", "--format", dest="format", default="blastxml",
-                  help="Input format (blastxml or tabular)")
-parser.add_option("-q", "--qseqid", dest="qseqid", default="1",
-                  help="Column for query 'qseqid' (for tabular input; default 1)")
-parser.add_option("-s", "--sseqid", dest="sseqid", default="2",
-                  help="Column for subject 'sseqid' (for tabular input; default 2)")
-parser.add_option("-d", "--salltitles", dest="salltitles", default="25",
-                  help="Column for descriptions 'salltitles' (for tabular input; default 25)")
+parser.add_option(
+    "-t",
+    "--topN",
+    dest="topN",
+    default=3,
+    help="Number of descriptions to collect (in order from file)",
+)
+parser.add_option(
+    "-o",
+    "--output",
+    dest="out_file",
+    default=None,
+    help="Output filename for tabular file",
+    metavar="FILE",
+)
+parser.add_option(
+    "-f",
+    "--format",
+    dest="format",
+    default="blastxml",
+    help="Input format (blastxml or tabular)",
+)
+parser.add_option(
+    "-q",
+    "--qseqid",
+    dest="qseqid",
+    default="1",
+    help="Column for query 'qseqid' (for tabular input; default 1)",
+)
+parser.add_option(
+    "-s",
+    "--sseqid",
+    dest="sseqid",
+    default="2",
+    help="Column for subject 'sseqid' (for tabular input; default 2)",
+)
+parser.add_option(
+    "-d",
+    "--salltitles",
+    dest="salltitles",
+    default="25",
+    help="Column for descriptions 'salltitles' (for tabular input; default 25)",
+)
 (options, args) = parser.parse_args()
 
 if len(sys.argv) == 4 and len(args) == 3 and not options.out_file:
-    sys.exit("""The API has changed, replace this:
+    sys.exit(
+        """The API has changed, replace this:
 
 $ python blastxml_to_top_descr.py input.xml output.tab 3
 
@@ -66,7 +98,8 @@
 $ python blastxml_to_top_descr.py -o output.tab -t 3 input.xml
 
 Sorry.
-""")
+"""
+    )
 
 if not args:
     sys.exit("Input filename missing, try -h")
@@ -140,7 +173,9 @@
     except Exception:
         with open(in_file) as handle:
             header = handle.read(100)
-        sys.exit("Invalid data format in XML file %r which starts: %r" % (in_file, header))
+        sys.exit(
+            "Invalid data format in XML file %r which starts: %r" % (in_file, header)
+        )
     # turn it into an iterator
     context = iter(context)
     # get the root element
@@ -149,7 +184,10 @@
     except Exception:
         with open(in_file) as handle:
             header = handle.read(100)
-        sys.exit("Unable to get root element from XML file %r which starts: %r" % (in_file, header))
+        sys.exit(
+            "Unable to get root element from XML file %r which starts: %r"
+            % (in_file, header)
+        )
 
     re_default_query_id = re.compile(r"^Query_\d+$")
     assert re_default_query_id.match(r"Query_101")
@@ -180,7 +218,9 @@
             # <Iteration_hits>...
             qseqid = elem.findtext("Iteration_query-ID")
             if qseqid is None:
-                sys.exit("Missing <Iteration_query-ID> (could be really old BLAST XML data?)")
+                sys.exit(
+                    "Missing <Iteration_query-ID> (could be really old BLAST XML data?)"
+                )
             if re_default_query_id.match(qseqid):
                 # Place holder ID, take the first word of the query definition
                 qseqid = elem.findtext("Iteration_query-def").split(None, 1)[0]
@@ -206,13 +246,16 @@
                 # <Hit_accession>P56514</Hit_accession>
                 # or,
                 # <Hit_id>Subject_1</Hit_id>
-                # <Hit_def>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]</Hit_def>
+                # <Hit_def>gi|57163783|ref|NP_001009242.1|
+                # rhodopsin [Felis catus]</Hit_def>
                 # <Hit_accession>Subject_1</Hit_accession>
                 #
                 # apparently depending on the parse_deflines switch
                 sseqid = hit.findtext("Hit_id").split(None, 1)[0]
                 hit_def = sseqid + " " + hit.findtext("Hit_def")
-                if re_default_subject_id.match(sseqid) and sseqid == hit.findtext("Hit_accession"):
+                if re_default_subject_id.match(sseqid) and sseqid == hit.findtext(
+                    "Hit_accession"
+                ):
                     # Place holder ID, take the first word of the subject definition
                     hit_def = hit.findtext("Hit_def")
                     sseqid = hit_def.split(None, 1)[0]
@@ -249,7 +292,7 @@
 if out_file is None:
     outfile = sys.stdout
 else:
-    outfile = open(out_file, 'w')
+    outfile = open(out_file, "w")
 outfile.write("#Query\t%s\n" % "\t".join("BLAST hit %i" % (i + 1) for i in range(topN)))
 for query, descrs in hits:
     count += 1
--- a/tools/blastxml_to_top_descr/repository_dependencies.xml	Fri Nov 09 11:59:43 2018 -0500
+++ b/tools/blastxml_to_top_descr/repository_dependencies.xml	Fri Feb 22 09:56:23 2019 -0500
@@ -1,4 +1,4 @@
-<?xml version="1.0"?>
+<?xml version="1.0" ?>
 <repositories description="Requires BLAST XML and database datatype definitions.">
-<repository changeset_revision="3eada762af11" name="blast_datatypes" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" />
-</repositories>
+    <repository changeset_revision="1250aab8b97a" name="blast_datatypes" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu"/>
+</repositories>
\ No newline at end of file