0
|
1 # $Id: fasta.pm,v 1.11 2002/12/14 19:09:05 birney Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::AlignIO::fasta
|
|
4
|
|
5 # based on the Bio::SeqIO::fasta module
|
|
6 # by Ewan Birney <birney@sanger.ac.uk>
|
|
7 # and Lincoln Stein <lstein@cshl.org>
|
|
8 #
|
|
9 # and the SimpleAlign.pm module of Ewan Birney
|
|
10 #
|
|
11 # Copyright Peter Schattner
|
|
12 #
|
|
13 # You may distribute this module under the same terms as perl itself
|
|
14 # _history
|
|
15 # September 5, 2000
|
|
16 # POD documentation - main docs before the code
|
|
17
|
|
18 =head1 NAME
|
|
19
|
|
20 Bio::AlignIO::fasta - FastA MSA Sequence input/output stream
|
|
21
|
|
22 =head1 SYNOPSIS
|
|
23
|
|
24 Do not use this module directly. Use it via the L<Bio::AlignIO> class.
|
|
25
|
|
26 =head1 DESCRIPTION
|
|
27
|
|
28 This object can transform L<Bio::SimpleAlign> objects to and from
|
|
29 fasta flat file databases. This is for the fasta sequence format NOT
|
|
30 FastA analysis program. To process the pairwise alignments from a
|
|
31 FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO module.
|
|
32
|
|
33 =head1 FEEDBACK
|
|
34
|
|
35 =head2 Reporting Bugs
|
|
36
|
|
37 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
38 the bugs and their resolution. Bug reports can be submitted via email
|
|
39 or the web:
|
|
40
|
|
41 bioperl-bugs@bio.perl.org
|
|
42 http://bugzilla.bioperl.org/
|
|
43
|
|
44 =head1 AUTHORS - Peter Schattner
|
|
45
|
|
46 Email: schattner@alum.mit.edu
|
|
47
|
|
48
|
|
49 =head1 APPENDIX
|
|
50
|
|
51 The rest of the documentation details each of the object
|
|
52 methods. Internal methods are usually preceded with a _
|
|
53
|
|
54 =cut
|
|
55
|
|
56 # Let the code begin...
|
|
57
|
|
58 package Bio::AlignIO::fasta;
|
|
59 use vars qw(@ISA);
|
|
60 use strict;
|
|
61
|
|
62 use Bio::AlignIO;
|
|
63 use Bio::SimpleAlign;
|
|
64
|
|
65 @ISA = qw(Bio::AlignIO);
|
|
66
|
|
67
|
|
68 =head2 next_aln
|
|
69
|
|
70 Title : next_aln
|
|
71 Usage : $aln = $stream->next_aln()
|
|
72 Function: returns the next alignment in the stream.
|
|
73 Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
|
|
74 or on error
|
|
75 Args : NONE
|
|
76
|
|
77 =cut
|
|
78
|
|
79 sub next_aln {
|
|
80 my $self = shift;
|
|
81 my $entry;
|
|
82 my ($start,$end,$name,$seqname,$seq,$seqchar,$tempname,%align);
|
|
83 my $aln = Bio::SimpleAlign->new();
|
|
84
|
|
85 while(defined ($entry = $self->_readline)) {
|
|
86 if($entry =~ /^>(\S+)/ ) {
|
|
87 $tempname = $1;
|
|
88 if( defined $name ) {
|
|
89 # put away last name and sequence
|
|
90
|
|
91 if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
|
|
92 $seqname = $1;
|
|
93 $start = $2;
|
|
94 $end = $3;
|
|
95 } else {
|
|
96 $seqname=$name;
|
|
97 $start = 1;
|
|
98 $end = length($seqchar); #ps 9/6/00
|
|
99 }
|
|
100 # print STDERR "Going to add with $seqchar $seqname\n";
|
|
101 $seq = new Bio::LocatableSeq('-seq'=>$seqchar,
|
|
102 '-id'=>$seqname,
|
|
103 '-start'=>$start,
|
|
104 '-end'=>$end,
|
|
105 );
|
|
106
|
|
107 $aln->add_seq($seq);
|
|
108 }
|
|
109 $name = $tempname;
|
|
110 $seqchar = "";
|
|
111 next;
|
|
112 }
|
|
113 $entry =~ s/[^A-Za-z\.\-]//g;
|
|
114 $seqchar .= $entry;
|
|
115
|
|
116 }
|
|
117 #
|
|
118 # Next two lines are to silence warnings that
|
|
119 # otherwise occur at EOF when using <$fh>
|
|
120
|
|
121 if (!defined $name) {$name="";}
|
|
122 if (!defined $seqchar) {$seqchar="";}
|
|
123
|
|
124 # Put away last name and sequence
|
|
125 if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
|
|
126 $seqname = $1;
|
|
127 $start = $2;
|
|
128 $end = $3;
|
|
129 } else {
|
|
130 $seqname=$name;
|
|
131 $start = 1;
|
|
132 $end = length($seqchar); #ps 9/6/00
|
|
133 # $end = length($align{$name});
|
|
134 }
|
|
135
|
|
136
|
|
137 # If $end <= 0, we have either reached the end of
|
|
138 # file in <> or we have encountered some other error
|
|
139 #
|
|
140 if ($end <= 0) { undef $aln; return $aln;}
|
|
141
|
|
142 # This logic now also reads empty lines at the
|
|
143 # end of the file. Skip this is seqchar and seqname is null
|
|
144 if( length($seqchar) == 0 && length($seqname) == 0 ) {
|
|
145 # skip
|
|
146 } else {
|
|
147 # print STDERR "end to add with $seqchar $seqname\n";
|
|
148 $seq = new Bio::LocatableSeq('-seq'=>$seqchar,
|
|
149 '-id'=>$seqname,
|
|
150 '-start'=>$start,
|
|
151 '-end'=>$end,
|
|
152 );
|
|
153
|
|
154 $aln->add_seq($seq);
|
|
155 }
|
|
156
|
|
157 return $aln;
|
|
158
|
|
159 }
|
|
160
|
|
161
|
|
162 =head2 write_aln
|
|
163
|
|
164 Title : write_aln
|
|
165 Usage : $stream->write_aln(@aln)
|
|
166 Function: writes the $aln object into the stream in fasta format
|
|
167 Returns : 1 for success and 0 for error
|
|
168 Args : L<Bio::Align::AlignI> object
|
|
169
|
|
170
|
|
171 =cut
|
|
172
|
|
173 sub write_aln {
|
|
174 my ($self,@aln) = @_;
|
|
175 my ($seq,$rseq,$name,$count,$length,$seqsub);
|
|
176
|
|
177 foreach my $aln (@aln) {
|
|
178 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
|
|
179 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
|
|
180 next;
|
|
181 }
|
|
182 foreach $rseq ( $aln->each_seq() ) {
|
|
183 $name = $aln->displayname($rseq->get_nse());
|
|
184 $seq = $rseq->seq();
|
|
185 $self->_print (">$name\n") or return ;
|
|
186 $count =0;
|
|
187 $length = length($seq);
|
|
188 while( ($count * 60 ) < $length ) {
|
|
189 $seqsub = substr($seq,$count*60,60);
|
|
190 $self->_print ("$seqsub\n") or return ;
|
|
191 $count++;
|
|
192 }
|
|
193 }
|
|
194 }
|
|
195 $self->flush if $self->_flush_on_write && defined $self->_fh;
|
|
196 return 1;
|
|
197 }
|
|
198
|
|
199 1;
|