Mercurial > repos > bgruening > rrna
view fasta.py @ 0:1a12c379df0c draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rRNA commit 1973f3035c10db80883d80847ea254289f5cce2a-dirty
author | bgruening |
---|---|
date | Thu, 17 Sep 2015 16:50:41 -0400 |
parents | |
children |
line wrap: on
line source
# Copyright (C) 2003, 2004, 2006 by Thomas Mailund <mailund@birc.au.dk> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or (at # your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, # USA. """ A parser for FASTA files. Copyright (C) 2003, 2004, 2006 by Thomas Mailund <mailund@birc.au.dk> """ class MalformedInput: "Exception raised when the input file does not look like a fasta file." pass class FastaRecord: "Wrapper around a fasta record." def __init__(self, header, sequence): "Create a record with the given header and sequence." self.header = header self.sequence = sequence def __str__(self): result = ['>'+self.header] for i in xrange(0,len(self.sequence),60): result.append(self.sequence[i:i+60]) return '\n'.join(result) def _fasta_itr_from_file(file): "Provide an iteration through the fasta records in file." h = file.readline().strip() if h[0] != '>': raise MalformedInput() h = h[1:] seq = [] for line in file: line = line.strip() # remove newline if not len(line): continue if line[0] == '>': yield FastaRecord(h,''.join(seq)) h = line[1:] seq = [] continue seq += [line] yield FastaRecord(h,''.join(seq)) def _fasta_itr_from_name(fname): "Provide an iteration through the fasta records in the file named fname. " f = open(fname) for rec in _fasta_itr_from_file(f): yield rec f.close() def _fasta_itr(src): """Provide an iteration through the fasta records in file `src'. Here `src' can be either a file object or the name of a file. """ if type(src) == str: return _fasta_itr_from_name(src) elif type(src) == file: return _fasta_itr_from_file(src) else: raise TypeError def fasta_get_by_name(itr,name): "Return the record in itr with the given name." x = name.strip() for rec in itr: if rec.header.strip() == x: return rec return None class fasta_itr: "An iterator through a sequence of fasta records." def __init__(self,src): "Create an iterator through the records in src." self.__itr = _fasta_itr(src) def __iter__(self): return self def next(self): return self.__itr.next() def __getitem__(self,name): return fasta_get_by_name(iter(self),name) class fasta_slice: """Provide an iteration through the fasta records in file `src', from index `start' to index `stop'. Here `src' can be either a file object or the name of a file. """ def __init__(self, src, start, stop): """Provide an iteration through the fasta records in file `src', from index `start' to index `stop'. Here `src' can be either a file object or the name of a file. """ self.__itr = _fasta_itr(src) self.__current = 0 self.__start = start self.__stop = stop def __iter__(self): return self def next(self): while self.__current < self.__start: # skip past first records until we get to `start' self.__itr.next() self.__current += 1 if self.__current >= self.__stop: # stop after `stop' raise StopIteration self.__current += 1 return self.__itr.next() def __getitem__(self,name): return fasta_get_by_name(iter(self),name) def get_sequence(src,name): "Return the record in src with the given name." return fasta_itr(src)[name] # TESTING... if __name__ == '__main__': import sys if len(sys.argv) != 2: print "wrong programmer error" sys.exit(2) print 'iterating through all sequences in input file' for rec in fasta_itr(sys.argv[1]): print rec print #print 'input sequences (terminated with ^D)' #for rec in fasta_itr(sys.stdin): # print rec #print print 'iterating through input, from the second sequence' for rec in fasta_slice(sys.argv[1], 1, 3): print rec print print 'the sequence for "bar"' print fasta_itr(sys.argv[1])["bar"] print fasta_slice(sys.argv[1],0,3)["bar"] print get_sequence(sys.argv[1],"bar") print