comparison variant_effect_predictor/Bio/AlignIO/phylip.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: phylip.pm,v 1.24.2.1 2003/01/26 15:52:30 jason Exp $
2 #
3 # BioPerl module for Bio::AlignIO::phylip
4 #
5 # Copyright Heikki Lehvaslaiho
6 #
7
8 =head1 NAME
9
10 Bio::AlignIO::phylip - PHYLIP format sequence input/output stream
11
12 =head1 SYNOPSIS
13
14 # Do not use this module directly. Use it via the Bio::AlignIO class.
15
16 use Bio::AlignIO;
17 use Bio::SimpleAlign;
18 #you can set the name length to something other than the default 10
19 #if you use a version of phylip (hacked) that accepts ids > 10
20 my $phylipstream = new Bio::AlignIO(-format => 'phylip',
21 -fh => \*STDOUT,
22 -idlength=>30);
23 # convert data from one format to another
24 my $gcgstream = new Bio::AlignIO(-format => 'msf',
25 -file => 't/data/cysprot1a.msf');
26
27 while( my $aln = $gcgstream->next_aln ) {
28 $phylipstream->write_aln($aln);
29 }
30
31 # do it again with phylip sequential format format
32 $phylipstream->interleaved(0);
33 # can also initialize the object like this
34 $phylipstream = new Bio::AlignIO(-interleaved => 0,
35 -format => 'phylip',
36 -fh => \*STDOUT,
37 -idlength=>10);
38 $gcgstream = new Bio::AlignIO(-format => 'msf',
39 -file => 't/data/cysprot1a.msf');
40
41 while( my $aln = $gcgstream->next_aln ) {
42 $phylipstream->write_aln($aln);
43 }
44
45 =head1 DESCRIPTION
46
47 This object can transform Bio::SimpleAlign objects to and from PHYLIP
48 interleaved format. It will not work with PHYLIP sequencial format.
49
50 This module will output PHYLIP sequential format. By specifying the
51 flag -interleaved =E<gt> 0 in the initialization the module can output
52 data in interleaved format.
53
54 =head1 FEEDBACK
55
56 =head2 Reporting Bugs
57
58 Report bugs to the Bioperl bug tracking system to help us keep track
59 the bugs and their resolution.
60 Bug reports can be submitted via email or the web:
61
62 bioperl-bugs@bio.perl.org
63 http://bugzilla.bioperl.org/
64
65 =head1 AUTHORS - Heikki Lehvaslaiho and Jason Stajich
66
67 Email: heikki@ebi.ac.uk
68 Email: jason@bioperl.org
69
70 =head1 APPENDIX
71
72 The rest of the documentation details each of the object
73 methods. Internal methods are usually preceded with a _
74
75 =cut
76
77 # Let the code begin...
78
79 package Bio::AlignIO::phylip;
80 use vars qw(@ISA $DEFAULTIDLENGTH $DEFAULTLINELEN);
81 use strict;
82
83 use Bio::SimpleAlign;
84 use Bio::AlignIO;
85
86 @ISA = qw(Bio::AlignIO);
87
88 BEGIN {
89 $DEFAULTIDLENGTH = 10;
90 $DEFAULTLINELEN = 60;
91 }
92
93 =head2 new
94
95 Title : new
96 Usage : my $alignio = new Bio::AlignIO(-format => 'phylip'
97 -file => '>file',
98 -idlength => 10,
99 -idlinebreak => 1);
100 Function: Initialize a new L<Bio::AlignIO::phylip> reader or writer
101 Returns : L<Bio::AlignIO> object
102 Args : [specific for writing of phylip format files]
103 -idlength => integer - length of the id (will pad w/
104 spaces if needed)
105 -interleaved => boolean - whether or not write as interleaved
106 or sequential format
107 -linelength => integer of how long a sequence lines should be
108 -idlinebreak => insert a line break after the sequence id
109 so that sequence starts on the next line
110
111 =cut
112
113 sub _initialize {
114 my($self,@args) = @_;
115 $self->SUPER::_initialize(@args);
116
117 my ($interleave,$linelen,$idlinebreak,
118 $idlength) = $self->_rearrange([qw(INTERLEAVED
119 LINELENGTH
120 IDLINEBREAK
121 IDLENGTH)],@args);
122 $self->interleaved(1) if( $interleave || ! defined $interleave);
123 $self->idlength($idlength || $DEFAULTIDLENGTH);
124 $self->id_linebreak(1) if( $idlinebreak );
125 $self->line_length($linelen) if defined $linelen && $linelen > 0;
126 1;
127 }
128
129 =head2 next_aln
130
131 Title : next_aln
132 Usage : $aln = $stream->next_aln()
133 Function: returns the next alignment in the stream.
134 Throws an exception if trying to read in PHYLIP
135 sequential format.
136 Returns : L<Bio::SimpleAlign> object
137 Args :
138
139 =cut
140
141 sub next_aln {
142 my $self = shift;
143 my $entry;
144 my ($seqcount, $residuecount, %hash, $name,$str,
145 @names,$seqname,$start,$end,$count,$seq);
146
147 my $aln = Bio::SimpleAlign->new(-source => 'phylip');
148 $entry = $self->_readline and
149 ($seqcount, $residuecount) = $entry =~ /\s*(\d+)\s+(\d+)/;
150 return 0 unless $seqcount and $residuecount;
151
152 # first alignment section
153 my $idlen = $self->idlength;
154 $count = 0;
155 my $non_interleaved = ! $self->interleaved ;
156 while( $entry = $self->_readline) {
157 last if( $entry =~ /^\s?$/ && ! $non_interleaved );
158
159 if( $entry =~ /^\s+(.+)$/ ) {
160 $str = $1;
161 $non_interleaved = 1;
162 $str =~ s/\s//g;
163 $count = scalar @names;
164 $hash{$count} .= $str;
165 } elsif( $entry =~ /^(.{$idlen})\s+(.*)\s$/ ) {
166 $name = $1;
167 $str = $2;
168 $name =~ s/[\s\/]/_/g;
169 $name =~ s/_+$//; # remove any trailing _'s
170 push @names, $name;
171 $str =~ s/\s//g;
172 $count = scalar @names;
173 $hash{$count} = $str;
174 }
175 $self->throw("Not a valid interleaved PHYLIP file!") if $count > $seqcount;
176 }
177
178 unless( $non_interleaved ) {
179 # interleaved sections
180 $count = 0;
181 while( $entry = $self->_readline) {
182 # finish current entry
183 if($entry =~/\s*\d+\s+\d+/){
184 $self->_pushback($entry);
185 last;
186 }
187 $count = 0, next if $entry =~ /^\s$/;
188
189 $entry =~ /\s*(.*)$/ && do {
190 $str = $1;
191 $str =~ s/\s//g;
192 $count++;
193 $hash{$count} .= $str;
194 };
195 $self->throw("Not a valid interleaved PHYLIP file!") if $count > $seqcount;
196 }
197 }
198 return 0 if scalar @names < 1;
199
200 # sequence creation
201 $count = 0;
202 foreach $name ( @names ) {
203 $count++;
204 if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
205 $seqname = $1;
206 $start = $2;
207 $end = $3;
208 } else {
209 $seqname=$name;
210 $start = 1;
211 $str = $hash{$count};
212 $str =~ s/[^A-Za-z]//g;
213 $end = length($str);
214 }
215 # consistency test
216 $self->throw("Length of sequence [$seqname] is not [$residuecount]! ")
217 unless CORE::length($hash{$count}) == $residuecount;
218
219 $seq = new Bio::LocatableSeq('-seq'=>$hash{$count},
220 '-id'=>$seqname,
221 '-start'=>$start,
222 '-end'=>$end,
223 );
224
225 $aln->add_seq($seq);
226
227 }
228 return $aln;
229 }
230
231
232 =head2 write_aln
233
234 Title : write_aln
235 Usage : $stream->write_aln(@aln)
236 Function: writes the $aln object into the stream in MSF format
237 Returns : 1 for success and 0 for error
238 Args : L<Bio::Align::AlignI> object
239
240 =cut
241
242 sub write_aln {
243 my ($self,@aln) = @_;
244 my $count = 0;
245 my $wrapped = 0;
246 my $maxname;
247 my ($length,$date,$name,$seq,$miss,$pad,
248 %hash,@arr,$tempcount,$index,$idlength);
249
250 foreach my $aln (@aln) {
251 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
252 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
253 next;
254 }
255 $self->throw("All sequences in the alignment must be the same length")
256 unless $aln->is_flush(1) ;
257
258 $aln->set_displayname_flat(); # plain
259 $length = $aln->length();
260 $self->_print (sprintf(" %s %s\n", $aln->no_sequences, $aln->length));
261
262 $idlength = $self->idlength();
263 foreach $seq ( $aln->each_seq() ) {
264 $name = $aln->displayname($seq->get_nse);
265 $name = substr($name, 0, $idlength) if length($name) > $idlength;
266 $name = sprintf("%-".$idlength."s",$name);
267 if( $self->interleaved() ) {
268 $name .= ' ' ;
269 } elsif( $self->id_linebreak) {
270 $name .= "\n";
271 }
272
273 #phylip needs dashes not dots
274 my $seq = $seq->seq();
275 $seq=~s/\./-/g;
276 $hash{$name} = $seq;
277 push(@arr,$name);
278 }
279
280 if( $self->interleaved() ) {
281 while( $count < $length ) {
282
283 # there is another block to go!
284 foreach $name ( @arr ) {
285 my $dispname = $name;
286 $dispname = '' if $wrapped;
287 $self->_print (sprintf("%".($idlength+3)."s",$dispname));
288 $tempcount = $count;
289 $index = 0;
290 while( ($tempcount + $idlength < $length) && ($index < 5) ) {
291 $self->_print (sprintf("%s ",substr($hash{$name},
292 $tempcount,
293 $idlength)));
294 $tempcount += $idlength;
295 $index++;
296 }
297 # last
298 if( $index < 5) {
299 # space to print!
300 $self->_print (sprintf("%s ",substr($hash{$name},
301 $tempcount)));
302 $tempcount += $idlength;
303 }
304 $self->_print ("\n");
305 }
306 $self->_print ("\n");
307 $count = $tempcount;
308 $wrapped = 1;
309 }
310 } else {
311 foreach $name ( @arr ) {
312 my $dispname = $name;
313 $dispname = '' if $wrapped;
314 $self->_print (sprintf("%s%s\n",$dispname,$hash{$name}));
315 }
316 }
317 }
318 $self->flush if $self->_flush_on_write && defined $self->_fh;
319 return 1;
320 }
321
322 =head2 interleaved
323
324 Title : interleaved
325 Usage : my $interleaved = $obj->interleaved
326 Function: Get/Set Interleaved status
327 Returns : boolean
328 Args : boolean
329
330
331 =cut
332
333 sub interleaved{
334 my ($self,$value) = @_;
335 my $previous = $self->{'_interleaved'};
336 if( defined $value ) {
337 $self->{'_interleaved'} = $value;
338 }
339 return $previous;
340 }
341
342 =head2 idlength
343
344 Title : idlength
345 Usage : my $idlength = $obj->interleaved
346 Function: Get/Set value of id length
347 Returns : string
348 Args : string
349
350
351 =cut
352
353 sub idlength {
354 my($self,$value) = @_;
355 if (defined $value){
356 $self->{'_idlength'} = $value;
357 }
358 return $self->{'_idlength'};
359 }
360
361 =head2 line_length
362
363 Title : line_length
364 Usage : $obj->line_length($newval)
365 Function:
366 Returns : value of line_length
367 Args : newvalue (optional)
368
369
370 =cut
371
372 sub line_length{
373 my ($self,$value) = @_;
374 if( defined $value) {
375 $self->{'line_length'} = $value;
376 }
377 return $self->{'line_length'} || $DEFAULTLINELEN;
378
379 }
380
381 =head2 id_linebreak
382
383 Title : id_linebreak
384 Usage : $obj->id_linebreak($newval)
385 Function:
386 Returns : value of id_linebreak
387 Args : newvalue (optional)
388
389
390 =cut
391
392 sub id_linebreak{
393 my ($self,$value) = @_;
394 if( defined $value) {
395 $self->{'_id_linebreak'} = $value;
396 }
397 return $self->{'_id_linebreak'} || 0;
398 }
399
400 1;