comparison variant_effect_predictor/Bio/SeqIO/fasta.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: fasta.pm,v 1.41.2.4 2003/09/18 02:43:16 jason Exp $
2 # BioPerl module for Bio::SeqIO::fasta
3 #
4 # Cared for by Ewan Birney <birney@ebi.ac.uk>
5 # and Lincoln Stein <lstein@cshl.org>
6 #
7 # Copyright Ewan Birney & Lincoln Stein
8 #
9 # You may distribute this module under the same terms as perl itself
10 # _history
11 # October 18, 1999 Largely rewritten by Lincoln Stein
12
13 # POD documentation - main docs before the code
14
15 =head1 NAME
16
17 Bio::SeqIO::fasta - fasta sequence input/output stream
18
19 =head1 SYNOPSIS
20
21 Do not use this module directly. Use it via the Bio::SeqIO class.
22
23 =head1 DESCRIPTION
24
25 This object can transform Bio::Seq objects to and from fasta flat
26 file databases.
27
28 A method L<preferred_id_type()> can be used to specify the type of ID
29 we would like to parse from the fasta line. By default 'display' is
30 used, which means it parses everything from the '>' to the first space
31 and makes that the 'display_id' for the sequence.
32
33 Can be one of:
34 - accession
35 - accession.version
36 - display
37 - primary
38
39 =head1 FEEDBACK
40
41 =head2 Mailing Lists
42
43 User feedback is an integral part of the evolution of this and other
44 Bioperl modules. Send your comments and suggestions preferably to one
45 of the Bioperl mailing lists. Your participation is much appreciated.
46
47 bioperl-l@bioperl.org - General discussion
48 http://bioperl.org/MailList.shtml - About the mailing lists
49
50 =head2 Reporting Bugs
51
52 Report bugs to the Bioperl bug tracking system to help us keep track
53 the bugs and their resolution. Bug reports can be submitted via the
54 web:
55
56 http://bugzilla.bioperl.org/
57
58 =head1 AUTHORS - Ewan Birney & Lincoln Stein
59
60 Email: birney@ebi.ac.uk
61 lstein@cshl.org
62
63 =head1 CONTRIBUTORS
64
65 Jason Stajich, jason-at-bioperl.org
66
67 =head1 APPENDIX
68
69 The rest of the documentation details each of the object
70 methods. Internal methods are usually preceded with a _
71
72 =cut
73
74 # Let the code begin...
75
76 package Bio::SeqIO::fasta;
77 use vars qw(@ISA $WIDTH @SEQ_ID_TYPES $DEFAULT_SEQ_ID_TYPE);
78 use strict;
79 # Object preamble - inherits from Bio::Root::Object
80
81 use Bio::SeqIO;
82 use Bio::Seq::SeqFactory;
83 use Bio::Seq::SeqFastaSpeedFactory;
84
85 @ISA = qw(Bio::SeqIO);
86
87 @SEQ_ID_TYPES = qw(accession accession.version display primary);
88 $DEFAULT_SEQ_ID_TYPE = 'display';
89
90 BEGIN { $WIDTH = 60}
91
92 sub _initialize {
93 my($self,@args) = @_;
94 $self->SUPER::_initialize(@args);
95 my ($width) = $self->_rearrange([qw(WIDTH)], @args);
96 $width && $self->width($width);
97 unless ( defined $self->sequence_factory ) {
98 $self->sequence_factory(Bio::Seq::SeqFastaSpeedFactory->new());
99 }
100 }
101
102 =head2 next_seq
103
104 Title : next_seq
105 Usage : $seq = $stream->next_seq()
106 Function: returns the next sequence in the stream
107 Returns : Bio::Seq object
108 Args : NONE
109
110 =cut
111
112 sub next_seq {
113 my( $self ) = @_;
114 my $seq;
115 my $alphabet;
116 local $/ = "\n>";
117 return unless my $entry = $self->_readline;
118
119 chomp($entry);
120 if ($entry =~ m/\A\s*\Z/s) { # very first one
121 return unless $entry = $self->_readline;
122 chomp($entry);
123 }
124 $entry =~ s/^>//;
125
126 my ($top,$sequence) = split(/\n/,$entry,2);
127 defined $sequence && $sequence =~ s/>//g;
128 # my ($top,$sequence) = $entry =~ /^>?(.+?)\n+([^>]*)/s
129 # or $self->throw("Can't parse fasta entry");
130
131 my ($id,$fulldesc);
132 if( $top =~ /^\s*(\S+)\s*(.*)/ ) {
133 ($id,$fulldesc) = ($1,$2);
134 }
135
136 if (defined $id && $id eq '') {$id=$fulldesc;} # FIX incase no space
137 # between > and name \AE
138 defined $sequence && $sequence =~ s/\s//g; # Remove whitespace
139
140 # for empty sequences we need to know the mol.type
141 $alphabet = $self->alphabet();
142 if(defined $sequence && length($sequence) == 0) {
143 if(! defined($alphabet)) {
144 # let's default to dna
145 $alphabet = "dna";
146 }
147 } else {
148 # we don't need it really, so disable
149 $alphabet = undef;
150 }
151
152 $seq = $self->sequence_factory->create(
153 -seq => $sequence,
154 -id => $id,
155 # Ewan's note - I don't think this healthy
156 # but obviously to taste.
157 #-primary_id => $id,
158 -desc => $fulldesc,
159 -alphabet => $alphabet,
160 -direct => 1,
161 );
162
163
164
165
166 # if there wasn't one before, set the guessed type
167 unless ( defined $alphabet ) {
168 $self->alphabet($seq->alphabet());
169 }
170 return $seq;
171
172 }
173
174 =head2 write_seq
175
176 Title : write_seq
177 Usage : $stream->write_seq(@seq)
178 Function: writes the $seq object into the stream
179 Returns : 1 for success and 0 for error
180 Args : array of 1 to n Bio::PrimarySeqI objects
181
182
183 =cut
184
185 sub write_seq {
186 my ($self,@seq) = @_;
187 my $width = $self->width;
188 foreach my $seq (@seq) {
189 $self->throw("Did not provide a valid Bio::PrimarySeqI object")
190 unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
191
192 my $str = $seq->seq;
193 my $top;
194
195 # Allow for different ids
196 my $id_type = $self->preferred_id_type;
197 if( $id_type =~ /^acc/i ) {
198 $top = $seq->accession_number();
199 if( $id_type =~ /vers/i ) {
200 $top .= "." . $seq->version();
201 }
202 } elsif($id_type =~ /^displ/i ) {
203 $top = $seq->display_id();
204 } elsif($id_type =~ /^pri/i ) {
205 $top = $seq->primary_id();
206 }
207
208 if ($seq->can('desc') and my $desc = $seq->desc()) {
209 $desc =~ s/\n//g;
210 $top .= " $desc";
211 }
212 if(length($str) > 0) {
213 $str =~ s/(.{1,$width})/$1\n/g;
214 } else {
215 $str = "\n";
216 }
217 $self->_print (">",$top,"\n",$str) or return;
218 }
219
220 $self->flush if $self->_flush_on_write && defined $self->_fh;
221 return 1;
222 }
223
224 =head2 width
225
226 Title : width
227 Usage : $obj->width($newval)
228 Function: Get/Set the line width for FASTA output
229 Returns : value of width
230 Args : newvalue (optional)
231
232
233 =cut
234
235 sub width{
236 my ($self,$value) = @_;
237 if( defined $value) {
238 $self->{'width'} = $value;
239 }
240 return $self->{'width'} || $WIDTH;
241 }
242
243 =head2 preferred_id_type
244
245 Title : preferred_id_type
246 Usage : $obj->preferred_id_type('accession')
247 Function: Get/Set the preferred type of identifier to use in the ">ID" position
248 for FASTA output.
249 Returns : string, one of values defined in @Bio::SeqIO::fasta::SEQ_ID_TYPES.
250 Default = $Bio::SeqIO::fasta::DEFAULT_SEQ_ID_TYPE ('display').
251 Args : string when setting. This must be one of values defined in
252 @Bio::SeqIO::fasta::SEQ_ID_TYPES. Allowable values:
253 accession, accession.version, display, primary
254 Throws : fatal exception if the supplied id type is not in @SEQ_ID_TYPES.
255
256 =cut
257
258 sub preferred_id_type {
259 my ($self,$type) = @_;
260 if( defined $type ) {
261 if( ! grep lc($type) eq $_, @SEQ_ID_TYPES) {
262 $self->throw(-class=>'Bio::Root::BadParameter',
263 -text=>"Invalid ID type \"$type\". Must be one of: @SEQ_ID_TYPES");
264 }
265 $self->{'_seq_id_type'} = lc($type);
266 # print STDERR "Setting preferred_id_type=$type\n";
267 }
268 $self->{'_seq_id_type'} || $DEFAULT_SEQ_ID_TYPE;
269 }
270
271 1;