changeset 18:1250aab8b97a draft

planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 960f4708be7cdd486e4569e7b44eb856b2cad79d-dirty
author peterjc
date Fri, 22 Feb 2019 09:54:46 -0500
parents 3eada762af11
children db2480de852e
files blast.py
diffstat 1 files changed, 90 insertions(+), 62 deletions(-) [+]
line wrap: on
line diff
--- a/blast.py	Tue Oct 23 06:24:33 2018 -0400
+++ b/blast.py	Fri Feb 22 09:54:46 2019 -0500
@@ -13,10 +13,12 @@
 log = logging.getLogger(__name__)
 
 # Note implicit string concatenation here to avoid excessively long lines:
-_DOCTYPES = ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" '
-             '"http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">',
-             '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" '
-             '"NCBI_BlastOutput.dtd">']
+_DOCTYPES = [
+    '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" '
+    '"http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">',
+    '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" '
+    '"NCBI_BlastOutput.dtd">',
+]
 
 
 class BlastXml(GenericXml):
@@ -28,10 +30,10 @@
         """Set the peek and blurb text."""
         if not dataset.dataset.purged:
             dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte)
-            dataset.blurb = 'NCBI Blast XML data'
+            dataset.blurb = "NCBI Blast XML data"
         else:
-            dataset.peek = 'file does not exist'
-            dataset.blurb = 'file purged from disk'
+            dataset.peek = "file does not exist"
+            dataset.blurb = "file purged from disk"
 
     def sniff(self, filename):
         """Determine from the contents if the file is blastxml.
@@ -58,7 +60,7 @@
             handle.close()
             return False
         line = handle.readline()
-        if line.strip() != '<BlastOutput>':
+        if line.strip() != "<BlastOutput>":
             handle.close()
             return False
         handle.close()
@@ -74,8 +76,10 @@
             # For one file only, use base class method (move/copy)
             return Text.merge(split_files, output_file)
         if not split_files:
-            raise ValueError("Given no BLAST XML files, %r, to merge into %s"
-                             % (split_files, output_file))
+            raise ValueError(
+                "Given no BLAST XML files, %r, to merge into %s"
+                % (split_files, output_file)
+            )
         out = open(output_file, "w")
         h = None
         for f in split_files:
@@ -138,20 +142,25 @@
                 # Enough to check <BlastOutput_program> and <BlastOutput_version> match
                 out.close()
                 h.close()
-                raise ValueError("BLAST XML headers don't match for %s and %s - have:\n%s\n...\n\nAnd:\n%s\n...\n"
-                                 % (split_files[0], f, old_header[:300], header[:300]))
+                raise ValueError(
+                    "BLAST XML headers don't match for %s and %s - have:\n"
+                    "%s\n...\n\nAnd:\n%s\n...\n"
+                    % (split_files[0], f, old_header[:300], header[:300])
+                )
             else:
                 out.write("    <Iteration>\n")
             for line in h:
                 if "</BlastOutput_iterations>" in line:
                     break
-                # TODO - Increment <Iteration_iter-num> and if required automatic query names
-                # like <Iteration_query-ID>Query_3</Iteration_query-ID> to be increasing?
+                # TODO - Increment <Iteration_iter-num> and if required automatic query
+                # names like <Iteration_query-ID>Query_3</Iteration_query-ID> to be
+                # increasing?
                 out.write(line)
             h.close()
         out.write("  </BlastOutput_iterations>\n")
         out.write("</BlastOutput>\n")
         out.close()
+
     merge = staticmethod(merge)
 
 
@@ -164,8 +173,8 @@
             dataset.peek = "BLAST database (multiple files)"
             dataset.blurb = "BLAST database (multiple files)"
         else:
-            dataset.peek = 'file does not exist'
-            dataset.blurb = 'file purged from disk'
+            dataset.peek = "file does not exist"
+            dataset.blurb = "file purged from disk"
 
     def display_peek(self, dataset):
         """Create HTML content, used for displaying peek."""
@@ -174,16 +183,26 @@
         except Exception:
             return "BLAST database (multiple files)"
 
-    def display_data(self, trans, data, preview=False, filename=None,
-                     to_ext=None, size=None, offset=None, **kwd):
+    def display_data(
+        self,
+        trans,
+        data,
+        preview=False,
+        filename=None,
+        to_ext=None,
+        size=None,
+        offset=None,
+        **kwd
+    ):
         """Documented as an old display method, but still gets called via tests etc.
 
         This allows us to format the data shown in the central pane via the "eye" icon.
         """
         if filename is not None and filename != "index":
             # Change nothing - important for the unit tests to access child files:
-            return Data.display_data(self, trans, data, preview, filename,
-                                     to_ext, size, offset, **kwd)
+            return Data.display_data(
+                self, trans, data, preview, filename, to_ext, size, offset, **kwd
+            )
         if self.file_ext == "blastdbn":
             title = "This is a nucleotide BLAST database"
         elif self.file_ext == "blastdbp":
@@ -204,11 +223,16 @@
         if not msg:
             msg = title
         # Galaxy assumes HTML for the display of composite datatypes,
-        return "<html><head><title>%s</title></head><body><pre>%s</pre></body></html>" % (title, msg)
+        return (
+            "<html><head><title>%s</title></head><body><pre>%s</pre></body></html>"
+            % (title, msg)
+        )
 
     def merge(split_files, output_file):
         """Merge BLAST databases (not implemented for now)."""
-        raise NotImplementedError("Merging BLAST databases is non-trivial (do this via makeblastdb?)")
+        raise NotImplementedError(
+            "Merging BLAST databases is non-trivial (do this via makeblastdb?)"
+        )
 
     def split(cls, input_datasets, subdir_generator_function, split_params):
         """Split a BLAST database (not implemented for now)."""
@@ -220,40 +244,44 @@
 class BlastNucDb(_BlastDb, Data):
     """Class for nucleotide BLAST database files."""
 
-    file_ext = 'blastdbn'
+    file_ext = "blastdbn"
     allow_datatype_change = False
-    composite_type = 'basic'
+    composite_type = "basic"
 
     def __init__(self, **kwd):
         """Initialize the class."""
         Data.__init__(self, **kwd)
-        self.add_composite_file('blastdb.nhr', is_binary=True)  # sequence headers
-        self.add_composite_file('blastdb.nin', is_binary=True)  # index file
-        self.add_composite_file('blastdb.nsq', is_binary=True)  # nucleotide sequences
+        self.add_composite_file("blastdb.nhr", is_binary=True)  # sequence headers
+        self.add_composite_file("blastdb.nin", is_binary=True)  # index file
+        self.add_composite_file("blastdb.nsq", is_binary=True)  # nucleotide sequences
 
         # alias ( -gi_mask option of makeblastdb)
-        self.add_composite_file('blastdb.nal', is_binary=False, optional=True)
+        self.add_composite_file("blastdb.nal", is_binary=False, optional=True)
 
         # sorted sequence hash values ( -hash_index option of makeblastdb)
-        self.add_composite_file('blastdb.nhd', is_binary=True, optional=True)
+        self.add_composite_file("blastdb.nhd", is_binary=True, optional=True)
 
         # index of sequence hash values ( -hash_index option of makeblastdb)
-        self.add_composite_file('blastdb.nhi', is_binary=True, optional=True)
+        self.add_composite_file("blastdb.nhi", is_binary=True, optional=True)
 
-        # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
-        self.add_composite_file('blastdb.nnd', is_binary=True, optional=True)
+        # sorted GI values
+        # ( -parse_seqids option of makeblastdb and gi present in the description lines)
+        self.add_composite_file("blastdb.nnd", is_binary=True, optional=True)
 
-        # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
-        self.add_composite_file('blastdb.nni', is_binary=True, optional=True)
+        # index of GI values
+        # ( -parse_seqids option of makeblastdb and gi present in the description lines)
+        self.add_composite_file("blastdb.nni", is_binary=True, optional=True)
 
         # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb)
-        self.add_composite_file('blastdb.nog', is_binary=True, optional=True)
+        self.add_composite_file("blastdb.nog", is_binary=True, optional=True)
 
-        # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
-        self.add_composite_file('blastdb.nsd', is_binary=True, optional=True)
+        # sorted sequence accession values
+        # ( -hash_index or -parse_seqids option of makeblastdb)
+        self.add_composite_file("blastdb.nsd", is_binary=True, optional=True)
 
-        # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
-        self.add_composite_file('blastdb.nsi', is_binary=True, optional=True)
+        # index of sequence accession values
+        # ( -hash_index or -parse_seqids option of makeblastdb)
+        self.add_composite_file("blastdb.nsi", is_binary=True, optional=True)
 
         # first volume of the MegaBLAST index generated by makembindex
         # self.add_composite_file('blastdb.00.idx', is_binary=True, optional=True)
@@ -279,24 +307,24 @@
 class BlastProtDb(_BlastDb, Data):
     """Class for protein BLAST database files."""
 
-    file_ext = 'blastdbp'
+    file_ext = "blastdbp"
     allow_datatype_change = False
-    composite_type = 'basic'
+    composite_type = "basic"
 
     def __init__(self, **kwd):
         """Initialize the class."""
         Data.__init__(self, **kwd)
         # Component file comments are as in BlastNucDb except where noted
-        self.add_composite_file('blastdb.phr', is_binary=True)
-        self.add_composite_file('blastdb.pin', is_binary=True)
-        self.add_composite_file('blastdb.psq', is_binary=True)  # protein sequences
-        self.add_composite_file('blastdb.phd', is_binary=True, optional=True)
-        self.add_composite_file('blastdb.phi', is_binary=True, optional=True)
-        self.add_composite_file('blastdb.pnd', is_binary=True, optional=True)
-        self.add_composite_file('blastdb.pni', is_binary=True, optional=True)
-        self.add_composite_file('blastdb.pog', is_binary=True, optional=True)
-        self.add_composite_file('blastdb.psd', is_binary=True, optional=True)
-        self.add_composite_file('blastdb.psi', is_binary=True, optional=True)
+        self.add_composite_file("blastdb.phr", is_binary=True)
+        self.add_composite_file("blastdb.pin", is_binary=True)
+        self.add_composite_file("blastdb.psq", is_binary=True)  # protein sequences
+        self.add_composite_file("blastdb.phd", is_binary=True, optional=True)
+        self.add_composite_file("blastdb.phi", is_binary=True, optional=True)
+        self.add_composite_file("blastdb.pnd", is_binary=True, optional=True)
+        self.add_composite_file("blastdb.pni", is_binary=True, optional=True)
+        self.add_composite_file("blastdb.pog", is_binary=True, optional=True)
+        self.add_composite_file("blastdb.psd", is_binary=True, optional=True)
+        self.add_composite_file("blastdb.psi", is_binary=True, optional=True)
         # self.add_composite_file('blastdb.paa', is_binary=True, optional=True)
         # self.add_composite_file('blastdb.pab', is_binary=True, optional=True)
         # self.add_composite_file('blastdb.pac', is_binary=True, optional=True)
@@ -307,19 +335,19 @@
 class BlastDomainDb(_BlastDb, Data):
     """Class for domain BLAST database files."""
 
-    file_ext = 'blastdbd'
+    file_ext = "blastdbd"
     allow_datatype_change = False
-    composite_type = 'basic'
+    composite_type = "basic"
 
     def __init__(self, **kwd):
         """Initialize the class."""
         Data.__init__(self, **kwd)
-        self.add_composite_file('blastdb.phr', is_binary=True)
-        self.add_composite_file('blastdb.pin', is_binary=True)
-        self.add_composite_file('blastdb.psq', is_binary=True)
-        self.add_composite_file('blastdb.freq', is_binary=True, optional=True)
-        self.add_composite_file('blastdb.loo', is_binary=True, optional=True)
-        self.add_composite_file('blastdb.psd', is_binary=True, optional=True)
-        self.add_composite_file('blastdb.psi', is_binary=True, optional=True)
-        self.add_composite_file('blastdb.rps', is_binary=True, optional=True)
-        self.add_composite_file('blastdb.aux', is_binary=True, optional=True)
+        self.add_composite_file("blastdb.phr", is_binary=True)
+        self.add_composite_file("blastdb.pin", is_binary=True)
+        self.add_composite_file("blastdb.psq", is_binary=True)
+        self.add_composite_file("blastdb.freq", is_binary=True, optional=True)
+        self.add_composite_file("blastdb.loo", is_binary=True, optional=True)
+        self.add_composite_file("blastdb.psd", is_binary=True, optional=True)
+        self.add_composite_file("blastdb.psi", is_binary=True, optional=True)
+        self.add_composite_file("blastdb.rps", is_binary=True, optional=True)
+        self.add_composite_file("blastdb.aux", is_binary=True, optional=True)