Mercurial > repos > devteam > blast_datatypes
annotate blast.py @ 14:623a3fbe5340 draft
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
author | peterjc |
---|---|
date | Fri, 03 Feb 2017 12:34:03 -0500 |
parents | da92fef90117 |
children | 310ec0f47485 |
rev | line source |
---|---|
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
1 """NCBI BLAST datatypes. |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
2 |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
3 Covers the ``blastxml`` format and the BLAST databases. |
3 | 4 """ |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
5 |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
6 import logging |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
7 import os |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
8 from time import sleep |
3 | 9 |
10 from galaxy.datatypes.data import get_file_peek | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
11 from galaxy.datatypes.data import Data, Text |
3 | 12 from galaxy.datatypes.xml import GenericXml |
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
13 |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
14 log = logging.getLogger(__name__) |
5
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
15 |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
16 |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
17 class BlastXml(GenericXml): |
3 | 18 """NCBI Blast XML Output data""" |
19 file_ext = "blastxml" | |
20 | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
21 def set_peek(self, dataset, is_multi_byte=False): |
3 | 22 """Set the peek and blurb text""" |
23 if not dataset.dataset.purged: | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
24 dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte) |
3 | 25 dataset.blurb = 'NCBI Blast XML data' |
26 else: | |
27 dataset.peek = 'file does not exist' | |
28 dataset.blurb = 'file purged from disk' | |
4 | 29 |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
30 def sniff(self, filename): |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
31 """Determines whether the file is blastxml |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
32 |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
33 >>> from galaxy.datatypes.sniff import get_test_fname |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
34 >>> fname = get_test_fname('megablast_xml_parser_test1.blastxml') |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
35 >>> BlastXml().sniff(fname) |
3 | 36 True |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
37 >>> fname = get_test_fname('tblastn_four_human_vs_rhodopsin.xml') |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
38 >>> BlastXml().sniff(fname) |
3 | 39 True |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
40 >>> fname = get_test_fname('interval.interval') |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
41 >>> BlastXml().sniff(fname) |
3 | 42 False |
43 """ | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
44 # TODO - Use a context manager on Python 2.5+ to close handle |
3 | 45 handle = open(filename) |
46 line = handle.readline() | |
47 if line.strip() != '<?xml version="1.0"?>': | |
48 handle.close() | |
49 return False | |
50 line = handle.readline() | |
51 if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">', | |
52 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']: | |
53 handle.close() | |
54 return False | |
55 line = handle.readline() | |
56 if line.strip() != '<BlastOutput>': | |
57 handle.close() | |
58 return False | |
59 handle.close() | |
60 return True | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
61 |
3 | 62 def merge(split_files, output_file): |
63 """Merging multiple XML files is non-trivial and must be done in subclasses.""" | |
64 if len(split_files) == 1: | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
65 # For one file only, use base class method (move/copy) |
3 | 66 return Text.merge(split_files, output_file) |
4 | 67 if not split_files: |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
68 raise ValueError("Given no BLAST XML files, %r, to merge into %s" |
4 | 69 % (split_files, output_file)) |
3 | 70 out = open(output_file, "w") |
71 h = None | |
72 for f in split_files: | |
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
73 if not os.path.isfile(f): |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
74 log.warning("BLAST XML file %s missing, retry in 1s..." % f) |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
75 sleep(1) |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
76 if not os.path.isfile(f): |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
77 log.error("BLAST XML file %s missing" % f) |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
78 raise ValueError("BLAST XML file %s missing" % f) |
3 | 79 h = open(f) |
80 header = h.readline() | |
81 if not header: | |
82 out.close() | |
83 h.close() | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
84 # Retry, could be transient error with networked file system... |
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
85 log.warning("BLAST XML file %s empty, retry in 1s..." % f) |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
86 sleep(1) |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
87 h = open(f) |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
88 header = h.readline() |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
89 if not header: |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
90 log.error("BLAST XML file %s was empty" % f) |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
91 raise ValueError("BLAST XML file %s was empty" % f) |
3 | 92 if header.strip() != '<?xml version="1.0"?>': |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
93 out.write(header) # for diagnosis |
3 | 94 out.close() |
95 h.close() | |
96 raise ValueError("%s is not an XML file!" % f) | |
97 line = h.readline() | |
98 header += line | |
99 if line.strip() not in ['<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">', | |
100 '<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "NCBI_BlastOutput.dtd">']: | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
101 out.write(header) # for diagnosis |
3 | 102 out.close() |
103 h.close() | |
104 raise ValueError("%s is not a BLAST XML file!" % f) | |
105 while True: | |
106 line = h.readline() | |
107 if not line: | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
108 out.write(header) # for diagnosis |
3 | 109 out.close() |
110 h.close() | |
111 raise ValueError("BLAST XML file %s ended prematurely" % f) | |
112 header += line | |
113 if "<Iteration>" in line: | |
114 break | |
115 if len(header) > 10000: | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
116 # Something has gone wrong, don't load too much into memory! |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
117 # Write what we have to the merged file for diagnostics |
3 | 118 out.write(header) |
119 out.close() | |
120 h.close() | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
121 raise ValueError("The header in BLAST XML file %s is too long" % f) |
3 | 122 if "<BlastOutput>" not in header: |
123 out.close() | |
124 h.close() | |
125 raise ValueError("%s is not a BLAST XML file:\n%s\n..." % (f, header)) | |
126 if f == split_files[0]: | |
127 out.write(header) | |
128 old_header = header | |
129 elif old_header[:300] != header[:300]: | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
130 # Enough to check <BlastOutput_program> and <BlastOutput_version> match |
3 | 131 out.close() |
132 h.close() | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
133 raise ValueError("BLAST XML headers don't match for %s and %s - have:\n%s\n...\n\nAnd:\n%s\n...\n" |
3 | 134 % (split_files[0], f, old_header[:300], header[:300])) |
135 else: | |
136 out.write(" <Iteration>\n") | |
137 for line in h: | |
138 if "</BlastOutput_iterations>" in line: | |
139 break | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
140 # TODO - Increment <Iteration_iter-num> and if required automatic query names |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
141 # like <Iteration_query-ID>Query_3</Iteration_query-ID> to be increasing? |
3 | 142 out.write(line) |
143 h.close() | |
144 out.write(" </BlastOutput_iterations>\n") | |
145 out.write("</BlastOutput>\n") | |
146 out.close() | |
147 merge = staticmethod(merge) | |
148 | |
4 | 149 |
150 class _BlastDb(object): | |
151 """Base class for BLAST database datatype.""" | |
152 | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
153 def set_peek(self, dataset, is_multi_byte=False): |
4 | 154 """Set the peek and blurb text.""" |
155 if not dataset.dataset.purged: | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
156 dataset.peek = "BLAST database (multiple files)" |
4 | 157 dataset.blurb = "BLAST database (multiple files)" |
158 else: | |
159 dataset.peek = 'file does not exist' | |
160 dataset.blurb = 'file purged from disk' | |
161 | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
162 def display_peek(self, dataset): |
4 | 163 """Create HTML content, used for displaying peek.""" |
164 try: | |
165 return dataset.peek | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
166 except Exception: |
4 | 167 return "BLAST database (multiple files)" |
168 | |
169 def display_data(self, trans, data, preview=False, filename=None, | |
170 to_ext=None, size=None, offset=None, **kwd): | |
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
171 """Documented as an old display method, but still gets called via tests etc |
4 | 172 |
173 This allows us to format the data shown in the central pane via the "eye" icon. | |
174 """ | |
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
175 if filename is not None and filename != "index": |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
176 # Change nothing - important for the unit tests to access child files: |
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
177 return Data.display_data(self, trans, data, preview, filename, |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
178 to_ext, size, offset, **kwd) |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
179 if self.file_ext == "blastdbn": |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
180 title = "This is a nucleotide BLAST database" |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
181 elif self.file_ext == "blastdbp": |
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
182 title = "This is a protein BLAST database" |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
183 elif self.file_ext == "blastdbd": |
10 | 184 title = "This is a domain BLAST database" |
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
185 else: |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
186 # Error? |
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
187 title = "This is a BLAST database." |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
188 msg = "" |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
189 try: |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
190 # Try to use any text recorded in the dummy index file: |
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
191 handle = open(data.file_name, "rU") |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
192 msg = handle.read().strip() |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
193 handle.close() |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
194 except Exception: |
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
195 pass |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
196 if not msg: |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
197 msg = title |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
198 # Galaxy assumes HTML for the display of composite datatypes, |
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
199 return "<html><head><title>%s</title></head><body><pre>%s</pre></body></html>" % (title, msg) |
4 | 200 |
201 def merge(split_files, output_file): | |
202 """Merge BLAST databases (not implemented for now).""" | |
203 raise NotImplementedError("Merging BLAST databases is non-trivial (do this via makeblastdb?)") | |
204 | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
205 def split(cls, input_datasets, subdir_generator_function, split_params): |
4 | 206 """Split a BLAST database (not implemented for now).""" |
207 if split_params is None: | |
208 return None | |
209 raise NotImplementedError("Can't split BLAST databases") | |
210 | |
211 | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
212 class BlastNucDb(_BlastDb, Data): |
4 | 213 """Class for nucleotide BLAST database files.""" |
214 file_ext = 'blastdbn' | |
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
215 allow_datatype_change = False |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
216 composite_type = 'basic' |
4 | 217 |
5
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
218 def __init__(self, **kwd): |
4 | 219 Data.__init__(self, **kwd) |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
220 self.add_composite_file('blastdb.nhr', is_binary=True) # sequence headers |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
221 self.add_composite_file('blastdb.nin', is_binary=True) # index file |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
222 self.add_composite_file('blastdb.nsq', is_binary=True) # nucleotide sequences |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
223 self.add_composite_file('blastdb.nal', is_binary=False, optional=True) # alias ( -gi_mask option of makeblastdb) |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
224 self.add_composite_file('blastdb.nhd', is_binary=True, optional=True) # sorted sequence hash values ( -hash_index option of makeblastdb) |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
225 self.add_composite_file('blastdb.nhi', is_binary=True, optional=True) # index of sequence hash values ( -hash_index option of makeblastdb) |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
226 self.add_composite_file('blastdb.nnd', is_binary=True, optional=True) # sorted GI values ( -parse_seqids option of makeblastdb and gi present in the description lines) |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
227 self.add_composite_file('blastdb.nni', is_binary=True, optional=True) # index of GI values ( -parse_seqids option of makeblastdb and gi present in the description lines) |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
228 self.add_composite_file('blastdb.nog', is_binary=True, optional=True) # OID->GI lookup file ( -hash_index or -parse_seqids option of makeblastdb) |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
229 self.add_composite_file('blastdb.nsd', is_binary=True, optional=True) # sorted sequence accession values ( -hash_index or -parse_seqids option of makeblastdb) |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
230 self.add_composite_file('blastdb.nsi', is_binary=True, optional=True) # index of sequence accession values ( -hash_index or -parse_seqids option of makeblastdb) |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
231 # self.add_composite_file('blastdb.00.idx', is_binary=True, optional=True) # first volume of the MegaBLAST index generated by makembindex |
7
a44a7a5456e1
Uploaded v0.0.16a, Nucleotide database definition aware of MegaBLAST index superheader
peterjc
parents:
5
diff
changeset
|
232 # The previous line should be repeated for each index volume, with filename extensions like '.01.idx', '.02.idx', etc. |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
233 self.add_composite_file('blastdb.shd', is_binary=True, optional=True) # MegaBLAST index superheader (-old_style_index false option of makembindex) |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
234 # self.add_composite_file('blastdb.naa', is_binary=True, optional=True) # index of a WriteDB column for e.g. mask data |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
235 # self.add_composite_file('blastdb.nab', is_binary=True, optional=True) # data of a WriteDB column |
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
236 # self.add_composite_file('blastdb.nac', is_binary=True, optional=True) # multiple byte order for a WriteDB column |
7
a44a7a5456e1
Uploaded v0.0.16a, Nucleotide database definition aware of MegaBLAST index superheader
peterjc
parents:
5
diff
changeset
|
237 # The previous 3 lines should be repeated for each WriteDB column, with filename extensions like ('.nba', '.nbb', '.nbc'), ('.nca', '.ncb', '.ncc'), etc. |
4 | 238 |
5
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
239 |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
240 class BlastProtDb(_BlastDb, Data): |
4 | 241 """Class for protein BLAST database files.""" |
242 file_ext = 'blastdbp' | |
9
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
243 allow_datatype_change = False |
e36c60d13c94
Uploaded v0.0.18, tweak display_data for running tests
peterjc
parents:
7
diff
changeset
|
244 composite_type = 'basic' |
4 | 245 |
5
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
246 def __init__(self, **kwd): |
4 | 247 Data.__init__(self, **kwd) |
5
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
248 # Component file comments are as in BlastNucDb except where noted |
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
249 self.add_composite_file('blastdb.phr', is_binary=True) |
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
250 self.add_composite_file('blastdb.pin', is_binary=True) |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
251 self.add_composite_file('blastdb.psq', is_binary=True) # protein sequences |
5
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
252 self.add_composite_file('blastdb.phd', is_binary=True, optional=True) |
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
253 self.add_composite_file('blastdb.phi', is_binary=True, optional=True) |
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
254 self.add_composite_file('blastdb.pnd', is_binary=True, optional=True) |
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
255 self.add_composite_file('blastdb.pni', is_binary=True, optional=True) |
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
256 self.add_composite_file('blastdb.pog', is_binary=True, optional=True) |
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
257 self.add_composite_file('blastdb.psd', is_binary=True, optional=True) |
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
258 self.add_composite_file('blastdb.psi', is_binary=True, optional=True) |
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
259 # self.add_composite_file('blastdb.paa', is_binary=True, optional=True) |
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
260 # self.add_composite_file('blastdb.pab', is_binary=True, optional=True) |
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
261 # self.add_composite_file('blastdb.pac', is_binary=True, optional=True) |
b3a3ba0c1d47
Uploaded v0.0.15 which updates the BLAST database definitions.
peterjc
parents:
4
diff
changeset
|
262 # The last 3 lines should be repeated for each WriteDB column, with filename extensions like ('.pba', '.pbb', '.pbc'), ('.pca', '.pcb', '.pcc'), etc. |
10 | 263 |
264 | |
14
623a3fbe5340
planemo upload for repository https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ commit 3f6b5c953d522a724bbcd403bcb86f1e2757a556-dirty
peterjc
parents:
12
diff
changeset
|
265 class BlastDomainDb(_BlastDb, Data): |
10 | 266 """Class for domain BLAST database files.""" |
267 file_ext = 'blastdbd' | |
268 allow_datatype_change = False | |
269 composite_type = 'basic' | |
270 | |
271 def __init__(self, **kwd): | |
272 Data.__init__(self, **kwd) | |
273 self.add_composite_file('blastdb.phr', is_binary=True) | |
274 self.add_composite_file('blastdb.pin', is_binary=True) | |
275 self.add_composite_file('blastdb.psq', is_binary=True) | |
276 self.add_composite_file('blastdb.freq', is_binary=True, optional=True) | |
277 self.add_composite_file('blastdb.loo', is_binary=True, optional=True) | |
278 self.add_composite_file('blastdb.psd', is_binary=True, optional=True) | |
279 self.add_composite_file('blastdb.psi', is_binary=True, optional=True) | |
280 self.add_composite_file('blastdb.rps', is_binary=True, optional=True) | |
281 self.add_composite_file('blastdb.aux', is_binary=True, optional=True) |