annotate uniqprimer-0.5.0/primertools/fastaparser.py @ 2:05ae1ce478bc draft default tip

Uploaded
author dereeper
date Fri, 08 Jun 2018 10:49:07 -0400
parents cdd8f911ad91
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
1 '''
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
2 Created on Jan 1, 2011
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
3
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
4 @author: John L. Herndon
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
5 @contact: herndon@cs.colostate.edu
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
6 @organization: Colorado State University
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
7 @group: Computer Science Department, Asa Ben-Hur's laboratory
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
8 '''
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
9
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
10
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
11 import utils
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
12 import primersequence
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
13
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
14 from Bio import SeqIO
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
15 from Bio import Seq
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
16 from Bio import Alphabet
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
17
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
18 def parseFastaFileAsPrimerSequence( fileName ):
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
19
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
20 utils.logMessage("fastaparser::parseFastaFileAsPrimerSequence( )", "parsing fasta file {0}".format( fileName ) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
21 returnValue = { }
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
22
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
23 sequences = SeqIO.parse( open( fileName ), "fasta" )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
24
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
25 for sequence in sequences:
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
26 seqdata = primersequence.PrimerSequence( sequence.id, len( sequence ), sequence.seq )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
27 returnValue[ sequence.id ] = seqdata
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
28
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
29 utils.logMessage("fastaparser::parseFastaFileAsPrimerSequence( )", "read {0} sequences".format( len( returnValue.keys( ) ) ) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
30
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
31 return returnValue
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
32
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
33 def parseFastaFile( fileName ):
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
34 '''
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
35 parse a fasta file and return a list of Bio.Seq
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
36 '''
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
37 utils.logMessage("fastaparser::parseFastaFile( )", "parsing fasta file {0}".format( fileName ) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
38
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
39 sequences = SeqIO.parse( open( fileName ), "fasta" )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
40
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
41 return sequences
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
42
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
43 def writeFastaFile( sequences, fileName ):
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
44 '''
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
45 write a set of sequences to a fasta file.
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
46 returns the name of the new file
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
47 '''
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
48
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
49 primerSequenceIdent = "primer_sequences"
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
50 utils.logMessage( "PrimerManager::writeFastaFile( )", "Writing {0} sequences to fasta file".format( len( sequences ) ) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
51 seqRecords = [ ]
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
52 i = 0
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
53 for sequence in sequences:
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
54 seqStr = str( reduce( lambda x, y: str( x )+str( y ), sequence) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
55 seqRecord = SeqIO.SeqRecord( Seq.Seq( seqStr, Alphabet.IUPAC.extended_dna ), id="seq_{0}".format( i ) )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
56 seqRecords.append( seqRecord )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
57 i += 1
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
58
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
59 SeqIO.write( seqRecords, open( fileName, "w" ), "fasta" )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
60
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
61 utils.logMessage( "PrimerManager::writeFastaFile( )", "writing fasta file complete" )
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
62 return fileName
cdd8f911ad91 Uploaded
dereeper
parents:
diff changeset
63