changeset 15:310ec0f47485 draft

planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 96d587fc6f6cab23c597e88a83daf7eecd0d4162-dirty
author peterjc
date Thu, 09 Feb 2017 11:16:00 -0500
parents 623a3fbe5340
children 63befb860c3e
files blast.py
diffstat 1 files changed, 59 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/blast.py	Fri Feb 03 12:34:03 2017 -0500
+++ b/blast.py	Thu Feb 09 11:16:00 2017 -0500
@@ -7,12 +7,17 @@
 import os
 from time import sleep
 
-from galaxy.datatypes.data import get_file_peek
-from galaxy.datatypes.data import Data, Text
+from galaxy.datatypes.data import Data, Text, get_file_peek
 from galaxy.datatypes.xml import GenericXml
 
 log = logging.getLogger(__name__)
 
+# Note implicit string concatenation here to avoid excessively long lines:
+_DOCTYPES = ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" '
+             '"http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">',
+             '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" '
+             '"NCBI_BlastOutput.dtd">']
+
 
 class BlastXml(GenericXml):
     """NCBI Blast XML Output data"""
@@ -48,8 +53,7 @@
             handle.close()
             return False
         line = handle.readline()
-        if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">',
-                                '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']:
+        if line.strip() not in _DOCTYPES:
             handle.close()
             return False
         line = handle.readline()
@@ -96,8 +100,7 @@
                 raise ValueError("%s is not an XML file!" % f)
             line = h.readline()
             header += line
-            if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">',
-                                    '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']:
+            if line.strip() not in _DOCTYPES:
                 out.write(header)  # for diagnosis
                 out.close()
                 h.close()
@@ -220,21 +223,50 @@
         self.add_composite_file('blastdb.nhr', is_binary=True)  # sequence headers
         self.add_composite_file('blastdb.nin', is_binary=True)  # index file
         self.add_composite_file('blastdb.nsq', is_binary=True)  # nucleotide sequences
-        self.add_composite_file('blastdb.nal', is_binary=False, optional=True)  # alias ( -gi_mask option of makeblastdb)
-        self.add_composite_file('blastdb.nhd', is_binary=True, optional=True)  # sorted sequence hash values ( -hash_index option of makeblastdb)
-        self.add_composite_file('blastdb.nhi', is_binary=True, optional=True)  # index of sequence hash values ( -hash_index option of makeblastdb)
-        self.add_composite_file('blastdb.nnd', is_binary=True, optional=True)  # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
-        self.add_composite_file('blastdb.nni', is_binary=True, optional=True)  # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
-        self.add_composite_file('blastdb.nog', is_binary=True, optional=True)  # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb)
-        self.add_composite_file('blastdb.nsd', is_binary=True, optional=True)  # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
-        self.add_composite_file('blastdb.nsi', is_binary=True, optional=True)  # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
-#        self.add_composite_file('blastdb.00.idx', is_binary=True, optional=True)  # first volume of the MegaBLAST index generated by makembindex
-# The previous line should be repeated for each index volume, with filename extensions like '.01.idx', '.02.idx', etc.
-        self.add_composite_file('blastdb.shd', is_binary=True, optional=True)  # MegaBLAST index superheader (-old_style_index false option of makembindex)
-#        self.add_composite_file('blastdb.naa', is_binary=True, optional=True)  # index of a WriteDB column for e.g. mask data
-#        self.add_composite_file('blastdb.nab', is_binary=True, optional=True)  # data of a WriteDB column
-#        self.add_composite_file('blastdb.nac', is_binary=True, optional=True)  # multiple byte order for a WriteDB column
-# The previous 3 lines should be repeated for each WriteDB column, with filename extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc.
+
+        # alias ( -gi_mask option of makeblastdb)
+        self.add_composite_file('blastdb.nal', is_binary=False, optional=True)
+
+        # sorted sequence hash values ( -hash_index option of makeblastdb)
+        self.add_composite_file('blastdb.nhd', is_binary=True, optional=True)
+
+        # index of sequence hash values ( -hash_index option of makeblastdb)
+        self.add_composite_file('blastdb.nhi', is_binary=True, optional=True)
+
+        # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
+        self.add_composite_file('blastdb.nnd', is_binary=True, optional=True)
+
+        # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines)
+        self.add_composite_file('blastdb.nni', is_binary=True, optional=True)
+
+        # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb)
+        self.add_composite_file('blastdb.nog', is_binary=True, optional=True)
+
+        # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
+        self.add_composite_file('blastdb.nsd', is_binary=True, optional=True)
+
+        # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb)
+        self.add_composite_file('blastdb.nsi', is_binary=True, optional=True)
+
+        # first volume of the MegaBLAST index generated by makembindex
+        # self.add_composite_file('blastdb.00.idx', is_binary=True, optional=True)
+        # The previous line should be repeated for each index volume, with filename
+        # extensions like '.01.idx', '.02.idx', etc.
+
+        # MegaBLAST index superheader (-old_style_index false option of makembindex)
+        # self.add_composite_file('blastdb.shd', is_binary=True, optional=True)
+
+        # index of a WriteDB column for e.g. mask data
+        # self.add_composite_file('blastdb.naa', is_binary=True, optional=True)
+
+        # data of a WriteDB column
+        # self.add_composite_file('blastdb.nab', is_binary=True, optional=True)
+
+        # multiple byte order for a WriteDB column
+        # self.add_composite_file('blastdb.nac', is_binary=True, optional=True)
+
+        # The previous 3 lines should be repeated for each WriteDB column, with filename
+        # extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc.
 
 
 class BlastProtDb(_BlastDb, Data):
@@ -245,7 +277,7 @@
 
     def __init__(self, **kwd):
         Data.__init__(self, **kwd)
-# Component file comments are as in BlastNucDb except where noted
+        # Component file comments are as in BlastNucDb except where noted
         self.add_composite_file('blastdb.phr', is_binary=True)
         self.add_composite_file('blastdb.pin', is_binary=True)
         self.add_composite_file('blastdb.psq', is_binary=True)  # protein sequences
@@ -256,10 +288,11 @@
         self.add_composite_file('blastdb.pog', is_binary=True, optional=True)
         self.add_composite_file('blastdb.psd', is_binary=True, optional=True)
         self.add_composite_file('blastdb.psi', is_binary=True, optional=True)
-#        self.add_composite_file('blastdb.paa', is_binary=True, optional=True)
-#        self.add_composite_file('blastdb.pab', is_binary=True, optional=True)
-#        self.add_composite_file('blastdb.pac', is_binary=True, optional=True)
-# The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc.
+        # self.add_composite_file('blastdb.paa', is_binary=True, optional=True)
+        # self.add_composite_file('blastdb.pab', is_binary=True, optional=True)
+        # self.add_composite_file('blastdb.pac', is_binary=True, optional=True)
+        # The last 3 lines should be repeated for each WriteDB column, with filename
+        # extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc.
 
 
 class BlastDomainDb(_BlastDb, Data):