annotate proteomics.py @ 0:7101f7e4b00b

Uploaded
author iracooke
date Wed, 08 May 2013 03:25:50 -0400
parents
children b22ebbb05260
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
1 """
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
2 Proteomics format classes
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
3 """
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
4 import logging
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
5 import re
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
6 from galaxy.datatypes.data import *
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
7 from galaxy.datatypes.xml import *
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
8 from galaxy.datatypes.sniff import *
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
9 from galaxy.datatypes.binary import *
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
10 from galaxy.datatypes.interval import *
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
11
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
12 log = logging.getLogger(__name__)
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
13
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
14 class ProtGff( Gff ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
15 """Tab delimited data in Gff format"""
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
16 file_ext = "prot_gff"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
17 def set_peek( self, dataset, is_multi_byte=False ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
18 """Set the peek and blurb text"""
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
19 if not dataset.dataset.purged:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
20 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
21 dataset.blurb = 'Proteogenomics GFF'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
22 else:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
23 dataset.peek = 'file does not exist'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
24 dataset.blurb = 'file purged from disk'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
25
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
26 def sniff( self, filename ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
27 handle = open(filename)
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
28 xmlns_re = re.compile("^##gff-version")
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
29 for i in range(3):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
30 line = handle.readline()
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
31 if xmlns_re.match(line.strip()):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
32 handle.close()
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
33 return True
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
34
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
35 handle.close()
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
36 return False
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
37
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
38
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
39 class Xls( Binary ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
40 """Class describing a binary excel spreadsheet file"""
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
41 file_ext = "xls"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
42
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
43 def set_peek( self, dataset, is_multi_byte=False ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
44 if not dataset.dataset.purged:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
45 dataset.peek = "Excel Spreadsheet file"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
46 dataset.blurb = data.nice_size( dataset.get_size() )
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
47 else:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
48 dataset.peek = 'file does not exist'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
49 dataset.blurb = 'file purged from disk'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
50 def display_peek( self, dataset ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
51 try:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
52 return dataset.peek
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
53 except:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
54 return "Binary xls file (%s)" % ( data.nice_size( dataset.get_size() ) )
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
55
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
56 class ProteomicsXml(GenericXml):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
57 """ An enhanced XML datatype used to reuse code across several
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
58 proteomic/mass-spec datatypes. """
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
59
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
60 def sniff(self, filename):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
61 """ Determines whether the file is the correct XML type. """
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
62 with open(filename, 'r') as contents:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
63 while True:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
64 line = contents.readline()
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
65 if line == None or not line.startswith('<?'):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
66 break
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
67 pattern = '^<(\w*:)?%s' % self.root # pattern match <root or <ns:root for any ns string
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
68 return line != None and re.match(pattern, line) != None
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
69
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
70 def set_peek( self, dataset, is_multi_byte=False ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
71 """Set the peek and blurb text"""
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
72 if not dataset.dataset.purged:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
73 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
74 dataset.blurb = self.blurb
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
75 else:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
76 dataset.peek = 'file does not exist'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
77 dataset.blurb = 'file purged from disk'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
78
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
79 class PepXml(ProteomicsXml):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
80 """pepXML data"""
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
81 file_ext = "pepxml"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
82 blurb = 'pepXML data'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
83 root = "msms_pipeline_analysis"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
84
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
85
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
86 class MzML(ProteomicsXml):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
87 """mzML data"""
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
88 file_ext = "mzml"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
89 blurb = 'mzML Mass Spectrometry data'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
90 root = "(mzML|indexedmzML)"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
91
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
92
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
93 class ProtXML(ProteomicsXml):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
94 """protXML data"""
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
95 file_ext = "protxml"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
96 blurb = 'prot XML Search Results'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
97 root = "protein_summary"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
98
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
99
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
100 class MzXML(ProteomicsXml):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
101 """mzXML data"""
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
102 file_ext = "mzXML"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
103 blurb = "mzXML Mass Spectrometry data"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
104 root = "mzXML"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
105
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
106 ## PSI datatypes
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
107 class MzIdentML(ProteomicsXml):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
108 file_ext = "mzid"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
109 blurb = "XML identified peptides and proteins."
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
110 root = "MzIdentML"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
111
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
112
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
113 class TraML(ProteomicsXml):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
114 file_ext = "traML"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
115 blurb = "TraML transition list"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
116 root = "TraML"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
117
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
118
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
119 class MzQuantML(ProteomicsXml):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
120 file_ext = "mzq"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
121 blurb = "XML quantification data"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
122 root = "MzQuantML"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
123
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
124
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
125 class Mgf( Text ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
126 """Mascot Generic Format data"""
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
127 file_ext = "mgf"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
128
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
129 def set_peek( self, dataset, is_multi_byte=False ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
130 """Set the peek and blurb text"""
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
131 if not dataset.dataset.purged:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
132 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
133 dataset.blurb = 'mgf Mascot Generic Format'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
134 else:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
135 dataset.peek = 'file does not exist'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
136 dataset.blurb = 'file purged from disk'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
137
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
138
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
139 def sniff( self, filename ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
140 mgf_begin_ions = "BEGIN IONS"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
141 max_lines=100
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
142
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
143 for i, line in enumerate( file( filename ) ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
144 line = line.rstrip( '\n\r' )
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
145 if line==mgf_begin_ions:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
146 return True
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
147 if i>max_lines:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
148 return False
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
149
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
150
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
151 class MascotDat( Text ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
152 """Mascot search results """
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
153 file_ext = "mascotdat"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
154
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
155 def set_peek( self, dataset, is_multi_byte=False ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
156 """Set the peek and blurb text"""
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
157 if not dataset.dataset.purged:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
158 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
159 dataset.blurb = 'mascotdat Mascot Search Results'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
160 else:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
161 dataset.peek = 'file does not exist'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
162 dataset.blurb = 'file purged from disk'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
163
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
164
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
165 def sniff( self, filename ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
166 mime_version = "MIME-Version: 1.0 (Generated by Mascot version 1.0)"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
167 max_lines=10
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
168
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
169 for i, line in enumerate( file( filename ) ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
170 line = line.rstrip( '\n\r' )
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
171 if line==mime_version:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
172 return True
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
173 if i>max_lines:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
174 return False
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
175
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
176
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
177 class RAW( Binary ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
178 """Class describing a Thermo Finnigan binary RAW file"""
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
179 file_ext = "raw"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
180 def sniff( self, filename ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
181 # Thermo Finnigan RAW format is proprietary and hence not well documented.
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
182 # Files start with 2 bytes that seem to differ followed by F\0i\0n\0n\0i\0g\0a\0n
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
183 # This combination represents 17 bytes, but to play safe we read 20 bytes from
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
184 # the start of the file.
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
185 try:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
186 header = open( filename ).read(20)
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
187 hexheader = binascii.b2a_hex( header )
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
188 finnigan = binascii.hexlify( 'F\0i\0n\0n\0i\0g\0a\0n' )
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
189 if hexheader.find(finnigan) != -1:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
190 return True
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
191 return False
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
192 except:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
193 return False
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
194 def set_peek( self, dataset, is_multi_byte=False ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
195 if not dataset.dataset.purged:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
196 dataset.peek = "Thermo Finnigan RAW file"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
197 dataset.blurb = data.nice_size( dataset.get_size() )
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
198 else:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
199 dataset.peek = 'file does not exist'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
200 dataset.blurb = 'file purged from disk'
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
201 def display_peek( self, dataset ):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
202 try:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
203 return dataset.peek
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
204 except:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
205 return "Thermo Finnigan RAW file (%s)" % ( data.nice_size( dataset.get_size() ) )
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
206
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
207
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
208 if hasattr(Binary, 'register_sniffable_binary_format'):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
209 Binary.register_sniffable_binary_format('RAW', 'RAW', RAW)
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
210
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
211
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
212 class Msp(Text):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
213 """ Output of NIST MS Search Program chemdata.nist.gov/mass-spc/ftp/mass-spc/PepLib.pdf """
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
214 file_ext = "msp"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
215
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
216 @staticmethod
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
217 def next_line_starts_with(contents, prefix):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
218 next_line = contents.readline()
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
219 return next_line != None and next_line.startswith(prefix)
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
220
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
221 def sniff(self, filename):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
222 """ Determines whether the file is a NIST MSP output file.
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
223
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
224 >>> fname = get_test_fname('test.msp')
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
225 >>> Msp().sniff(fname)
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
226 True
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
227 >>> fname = get_test_fname('test.mzXML')
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
228 >>> Msp().sniff(fname)
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
229 False
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
230 """
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
231 with open(filename, 'r') as contents:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
232 return Msp.next_line_starts_with(contents, "Name:") and Msp.next_line_starts_with(contents, "MW:")
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
233
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
234 class Ms2(Text):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
235 file_ext = "ms2"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
236
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
237 def sniff(self, filename):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
238 """ Determines whether the file is a valid ms2 file.
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
239
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
240 >>> fname = get_test_fname('test.msp')
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
241 >>> Ms2().sniff(fname)
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
242 False
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
243 >>> fname = get_test_fname('test.ms2')
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
244 >>> Ms2().sniff(fname)
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
245 True
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
246 """
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
247
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
248 with open(filename, 'r') as contents:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
249 header_lines = []
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
250 while True:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
251 line = contents.readline()
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
252 if line == None or len(line) == 0:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
253 pass
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
254 elif line.startswith('H\t'):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
255 header_lines.append(line)
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
256 else:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
257 break
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
258 for header_field in ['CreationDate', 'Extractor', 'ExtractorVersion', 'ExtractorOptions']:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
259 found_header = False
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
260 for header_line in header_lines:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
261 if header_line.startswith('H\t%s' % (header_field)):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
262 found_header = True
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
263 break
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
264 if not found_header:
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
265 return False
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
266
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
267 return True
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
268
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
269 # unsniffable binary format, should do something about this
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
270 class XHunterAslFormat(Binary):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
271 """ Annotated Spectra in the HLF format http://www.thegpm.org/HUNTER/format_2006_09_15.html """
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
272 file_ext = "hlf"
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
273
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
274
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
275 if hasattr(Binary, 'register_unsniffable_binary_ext'):
7101f7e4b00b Uploaded
iracooke
parents:
diff changeset
276 Binary.register_unsniffable_binary_ext('hlf')