Mercurial > repos > bgruening > rrna
comparison fasta.py @ 0:1a12c379df0c draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rRNA commit 1973f3035c10db80883d80847ea254289f5cce2a-dirty
author | bgruening |
---|---|
date | Thu, 17 Sep 2015 16:50:41 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1a12c379df0c |
---|---|
1 | |
2 # Copyright (C) 2003, 2004, 2006 by Thomas Mailund <mailund@birc.au.dk> | |
3 # | |
4 # This program is free software; you can redistribute it and/or modify | |
5 # it under the terms of the GNU General Public License as published by | |
6 # the Free Software Foundation; either version 2 of the License, or (at | |
7 # your option) any later version. | |
8 # | |
9 # This program is distributed in the hope that it will be useful, but | |
10 # WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 # General Public License for more details. | |
13 # | |
14 # You should have received a copy of the GNU General Public License | |
15 # along with this program; if not, write to the Free Software | |
16 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, | |
17 # USA. | |
18 | |
19 """ | |
20 A parser for FASTA files. | |
21 | |
22 Copyright (C) 2003, 2004, 2006 by Thomas Mailund <mailund@birc.au.dk> | |
23 """ | |
24 | |
25 class MalformedInput: | |
26 "Exception raised when the input file does not look like a fasta file." | |
27 pass | |
28 | |
29 class FastaRecord: | |
30 "Wrapper around a fasta record." | |
31 def __init__(self, header, sequence): | |
32 "Create a record with the given header and sequence." | |
33 self.header = header | |
34 self.sequence = sequence | |
35 | |
36 def __str__(self): | |
37 result = ['>'+self.header] | |
38 for i in xrange(0,len(self.sequence),60): | |
39 result.append(self.sequence[i:i+60]) | |
40 return '\n'.join(result) | |
41 | |
42 | |
43 def _fasta_itr_from_file(file): | |
44 "Provide an iteration through the fasta records in file." | |
45 | |
46 h = file.readline().strip() | |
47 if h[0] != '>': | |
48 raise MalformedInput() | |
49 h = h[1:] | |
50 | |
51 seq = [] | |
52 for line in file: | |
53 line = line.strip() # remove newline | |
54 if not len(line): | |
55 continue | |
56 if line[0] == '>': | |
57 yield FastaRecord(h,''.join(seq)) | |
58 | |
59 h = line[1:] | |
60 seq = [] | |
61 continue | |
62 | |
63 seq += [line] | |
64 | |
65 yield FastaRecord(h,''.join(seq)) | |
66 | |
67 | |
68 def _fasta_itr_from_name(fname): | |
69 "Provide an iteration through the fasta records in the file named fname. " | |
70 f = open(fname) | |
71 for rec in _fasta_itr_from_file(f): | |
72 yield rec | |
73 f.close() | |
74 | |
75 | |
76 def _fasta_itr(src): | |
77 """Provide an iteration through the fasta records in file `src'. | |
78 | |
79 Here `src' can be either a file object or the name of a file. | |
80 """ | |
81 if type(src) == str: | |
82 return _fasta_itr_from_name(src) | |
83 elif type(src) == file: | |
84 return _fasta_itr_from_file(src) | |
85 else: | |
86 raise TypeError | |
87 | |
88 def fasta_get_by_name(itr,name): | |
89 "Return the record in itr with the given name." | |
90 x = name.strip() | |
91 for rec in itr: | |
92 if rec.header.strip() == x: | |
93 return rec | |
94 return None | |
95 | |
96 class fasta_itr: | |
97 "An iterator through a sequence of fasta records." | |
98 def __init__(self,src): | |
99 "Create an iterator through the records in src." | |
100 self.__itr = _fasta_itr(src) | |
101 | |
102 def __iter__(self): | |
103 return self | |
104 def next(self): | |
105 return self.__itr.next() | |
106 | |
107 def __getitem__(self,name): | |
108 return fasta_get_by_name(iter(self),name) | |
109 | |
110 class fasta_slice: | |
111 """Provide an iteration through the fasta records in file `src', from | |
112 index `start' to index `stop'. | |
113 | |
114 Here `src' can be either a file object or the name of a file. | |
115 """ | |
116 def __init__(self, src, start, stop): | |
117 """Provide an iteration through the fasta records in file `src', from | |
118 index `start' to index `stop'. | |
119 | |
120 Here `src' can be either a file object or the name of a file. | |
121 """ | |
122 self.__itr = _fasta_itr(src) | |
123 self.__current = 0 | |
124 self.__start = start | |
125 self.__stop = stop | |
126 | |
127 def __iter__(self): | |
128 return self | |
129 | |
130 def next(self): | |
131 while self.__current < self.__start: | |
132 # skip past first records until we get to `start' | |
133 self.__itr.next() | |
134 self.__current += 1 | |
135 | |
136 if self.__current >= self.__stop: | |
137 # stop after `stop' | |
138 raise StopIteration | |
139 | |
140 self.__current += 1 | |
141 return self.__itr.next() | |
142 | |
143 def __getitem__(self,name): | |
144 return fasta_get_by_name(iter(self),name) | |
145 | |
146 def get_sequence(src,name): | |
147 "Return the record in src with the given name." | |
148 return fasta_itr(src)[name] | |
149 | |
150 | |
151 # TESTING... | |
152 if __name__ == '__main__': | |
153 import sys | |
154 if len(sys.argv) != 2: | |
155 print "wrong programmer error" | |
156 sys.exit(2) | |
157 | |
158 print 'iterating through all sequences in input file' | |
159 for rec in fasta_itr(sys.argv[1]): | |
160 print rec | |
161 print | |
162 | |
163 #print 'input sequences (terminated with ^D)' | |
164 #for rec in fasta_itr(sys.stdin): | |
165 # print rec | |
166 #print | |
167 | |
168 print 'iterating through input, from the second sequence' | |
169 for rec in fasta_slice(sys.argv[1], 1, 3): | |
170 print rec | |
171 print | |
172 | |
173 print 'the sequence for "bar"' | |
174 print fasta_itr(sys.argv[1])["bar"] | |
175 print fasta_slice(sys.argv[1],0,3)["bar"] | |
176 print get_sequence(sys.argv[1],"bar") | |
177 print | |
178 | |
179 | |
180 |