comparison fasta.py @ 0:1a12c379df0c draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rRNA commit 1973f3035c10db80883d80847ea254289f5cce2a-dirty
author bgruening
date Thu, 17 Sep 2015 16:50:41 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1a12c379df0c
1
2 # Copyright (C) 2003, 2004, 2006 by Thomas Mailund <mailund@birc.au.dk>
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or (at
7 # your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307,
17 # USA.
18
19 """
20 A parser for FASTA files.
21
22 Copyright (C) 2003, 2004, 2006 by Thomas Mailund <mailund@birc.au.dk>
23 """
24
25 class MalformedInput:
26 "Exception raised when the input file does not look like a fasta file."
27 pass
28
29 class FastaRecord:
30 "Wrapper around a fasta record."
31 def __init__(self, header, sequence):
32 "Create a record with the given header and sequence."
33 self.header = header
34 self.sequence = sequence
35
36 def __str__(self):
37 result = ['>'+self.header]
38 for i in xrange(0,len(self.sequence),60):
39 result.append(self.sequence[i:i+60])
40 return '\n'.join(result)
41
42
43 def _fasta_itr_from_file(file):
44 "Provide an iteration through the fasta records in file."
45
46 h = file.readline().strip()
47 if h[0] != '>':
48 raise MalformedInput()
49 h = h[1:]
50
51 seq = []
52 for line in file:
53 line = line.strip() # remove newline
54 if not len(line):
55 continue
56 if line[0] == '>':
57 yield FastaRecord(h,''.join(seq))
58
59 h = line[1:]
60 seq = []
61 continue
62
63 seq += [line]
64
65 yield FastaRecord(h,''.join(seq))
66
67
68 def _fasta_itr_from_name(fname):
69 "Provide an iteration through the fasta records in the file named fname. "
70 f = open(fname)
71 for rec in _fasta_itr_from_file(f):
72 yield rec
73 f.close()
74
75
76 def _fasta_itr(src):
77 """Provide an iteration through the fasta records in file `src'.
78
79 Here `src' can be either a file object or the name of a file.
80 """
81 if type(src) == str:
82 return _fasta_itr_from_name(src)
83 elif type(src) == file:
84 return _fasta_itr_from_file(src)
85 else:
86 raise TypeError
87
88 def fasta_get_by_name(itr,name):
89 "Return the record in itr with the given name."
90 x = name.strip()
91 for rec in itr:
92 if rec.header.strip() == x:
93 return rec
94 return None
95
96 class fasta_itr:
97 "An iterator through a sequence of fasta records."
98 def __init__(self,src):
99 "Create an iterator through the records in src."
100 self.__itr = _fasta_itr(src)
101
102 def __iter__(self):
103 return self
104 def next(self):
105 return self.__itr.next()
106
107 def __getitem__(self,name):
108 return fasta_get_by_name(iter(self),name)
109
110 class fasta_slice:
111 """Provide an iteration through the fasta records in file `src', from
112 index `start' to index `stop'.
113
114 Here `src' can be either a file object or the name of a file.
115 """
116 def __init__(self, src, start, stop):
117 """Provide an iteration through the fasta records in file `src', from
118 index `start' to index `stop'.
119
120 Here `src' can be either a file object or the name of a file.
121 """
122 self.__itr = _fasta_itr(src)
123 self.__current = 0
124 self.__start = start
125 self.__stop = stop
126
127 def __iter__(self):
128 return self
129
130 def next(self):
131 while self.__current < self.__start:
132 # skip past first records until we get to `start'
133 self.__itr.next()
134 self.__current += 1
135
136 if self.__current >= self.__stop:
137 # stop after `stop'
138 raise StopIteration
139
140 self.__current += 1
141 return self.__itr.next()
142
143 def __getitem__(self,name):
144 return fasta_get_by_name(iter(self),name)
145
146 def get_sequence(src,name):
147 "Return the record in src with the given name."
148 return fasta_itr(src)[name]
149
150
151 # TESTING...
152 if __name__ == '__main__':
153 import sys
154 if len(sys.argv) != 2:
155 print "wrong programmer error"
156 sys.exit(2)
157
158 print 'iterating through all sequences in input file'
159 for rec in fasta_itr(sys.argv[1]):
160 print rec
161 print
162
163 #print 'input sequences (terminated with ^D)'
164 #for rec in fasta_itr(sys.stdin):
165 # print rec
166 #print
167
168 print 'iterating through input, from the second sequence'
169 for rec in fasta_slice(sys.argv[1], 1, 3):
170 print rec
171 print
172
173 print 'the sequence for "bar"'
174 print fasta_itr(sys.argv[1])["bar"]
175 print fasta_slice(sys.argv[1],0,3)["bar"]
176 print get_sequence(sys.argv[1],"bar")
177 print
178
179
180