comparison variant_effect_predictor/Bio/AlignIO/mega.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: mega.pm,v 1.8 2002/10/22 07:45:10 lapp Exp $
2 #
3 # BioPerl module for Bio::AlignIO::mega
4 #
5 # Cared for by Jason Stajich <jason@bioperl.org>
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::AlignIO::mega - Parse and Create MEGA format data files
16
17 =head1 SYNOPSIS
18
19 use Bio::AlignIO;
20 my $alignio = new Bio::AlignIO(-format => 'mega',
21 -file => 't/data/hemoglobinA.meg');
22
23 while( my $aln = $alignio->next_aln ) {
24 # process each alignment or convert to another format like NEXUS
25 }
26
27 =head1 DESCRIPTION
28
29 This object handles reading and writing data streams in the MEGA
30 format (Kumar and Nei).
31
32
33 =head1 FEEDBACK
34
35
36 =head2 Mailing Lists
37
38 User feedback is an integral part of the evolution of this and other
39 Bioperl modules. Send your comments and suggestions preferably to
40 the Bioperl mailing list. Your participation is much appreciated.
41
42 bioperl-l@bioperl.org - General discussion
43 http://bioperl.org/MailList.shtml - About the mailing lists
44
45 =head2 Reporting Bugs
46
47 Report bugs to the Bioperl bug tracking system to help us keep track
48 of the bugs and their resolution. Bug reports can be submitted via
49 email or the web:
50
51 bioperl-bugs@bioperl.org
52 http://bugzilla.bioperl.org/
53
54 =head1 AUTHOR - Jason Stajich
55
56 Email jason@bioperl.org
57
58 Describe contact details here
59
60 =head1 CONTRIBUTORS
61
62 Additional contributors names and emails here
63
64 =head1 APPENDIX
65
66 The rest of the documentation details each of the object methods.
67 Internal methods are usually preceded with a _
68
69 =cut
70
71
72 # Let the code begin...
73
74
75 package Bio::AlignIO::mega;
76 use vars qw(@ISA $MEGANAMELEN %VALID_TYPES $LINELEN $BLOCKLEN);
77 use strict;
78
79 use Bio::AlignIO;
80 use Bio::SimpleAlign;
81 use Bio::LocatableSeq;
82
83 BEGIN {
84 $MEGANAMELEN = 10;
85 $LINELEN = 60;
86 $BLOCKLEN = 10;
87 %VALID_TYPES = map {$_, 1} qw( dna rna protein standard);
88 }
89 @ISA = qw(Bio::AlignIO );
90
91
92 =head2 next_aln
93
94 Title : next_aln
95 Usage : $aln = $stream->next_aln()
96 Function: returns the next alignment in the stream.
97 Supports the following MEGA format features:
98 - The file has to start with '#mega'
99 - Reads in the name of the alignment from a comment
100 (anything after '!TITLE: ') .
101 - Reads in the format parameters datatype
102
103 Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
104 or on error
105 Args : NONE
106
107
108 =cut
109
110 sub next_aln{
111 my ($self) = @_;
112 my $entry;
113 my ($alphabet,%seqs);
114
115 my $aln = Bio::SimpleAlign->new(-source => 'mega');
116
117 while( defined($entry = $self->_readline()) && ($entry =~ /^\s+$/) ) {}
118
119 $self->throw("Not a valid MEGA file! [#mega] not starting the file!")
120 unless $entry =~ /^#mega/i;
121
122 while( defined($entry = $self->_readline() ) ) {
123 local($_) = $entry;
124 if(/\!Title:\s*([^\;]+)\s*/i) { $aln->id($1)}
125 elsif( s/\!Format\s+([^\;]+)\s*/$1/ ) {
126 my (@fields) = split(/\s+/,$1);
127 foreach my $f ( @fields ) {
128 my ($name,$value) = split(/\=/,$f);
129 if( $name eq 'datatype' ) {
130 $alphabet = $value;
131 } elsif( $name eq 'identical' ) {
132 $aln->match_char($value);
133 } elsif( $name eq 'indel' ) {
134 $aln->gap_char($value);
135 }
136 }
137 } elsif( /^\#/ ) {
138 last;
139 }
140 }
141 my @order;
142 while( defined($entry) ) {
143 if( $entry !~ /^\s+$/ ) {
144 # this is to skip the leading '#'
145 my $seqname = substr($entry,1,$MEGANAMELEN-1);
146 $seqname =~ s/(\S+)\s+$/$1/g;
147 my $line = substr($entry,$MEGANAMELEN);
148 $line =~ s/\s+//g;
149 if( ! defined $seqs{$seqname} ) {push @order, $seqname; }
150 $seqs{$seqname} .= $line;
151 }
152 $entry = $self->_readline();
153 }
154
155 foreach my $seqname ( @order ) {
156 my $s = $seqs{$seqname};
157 $s =~ s/\-//g;
158 my $end = length($s);
159 my $seq = new Bio::LocatableSeq(-alphabet => $alphabet,
160 -id => $seqname,
161 -seq => $seqs{$seqname},
162 -start => 1,
163 -end => $end);
164
165 $aln->add_seq($seq);
166 }
167 return $aln;
168 }
169
170 =head2 write_aln
171
172 Title : write_aln
173 Usage : $stream->write_aln(@aln)
174 Function: writes the $aln object into the stream in MEGA format
175 Returns : 1 for success and 0 for error
176 Args : L<Bio::Align::AlignI> object
177
178 =cut
179
180 sub write_aln{
181 my ($self,@aln) = @_;
182 my $count = 0;
183 my $wrapped = 0;
184 my $maxname;
185
186 foreach my $aln ( @aln ) {
187 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
188 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
189 return 0;
190 } elsif( ! $aln->is_flush($self->verbose) ) {
191 $self->warn("All Sequences in the alignment must be the same length");
192 return 0;
193 }
194 $aln->match();
195 my $len = $aln->length();
196 my $format = sprintf('datatype=%s identical=%s indel=%s;',
197 $aln->get_seq_by_pos(1)->alphabet(),
198 $aln->match_char, $aln->gap_char);
199
200 $self->_print(sprintf("#mega\n!Title: %s;\n!Format %s\n\n\n",
201 $aln->id, $format));
202
203 my ($count, $blockcount,$length) = ( 0,0,$aln->length());
204 $aln->set_displayname_flat();
205 while( $count < $length ) {
206 foreach my $seq ( $aln->each_seq ) {
207 my $seqchars = $seq->seq();
208 $blockcount = 0;
209 my $substring = substr($seqchars, $count, $LINELEN);
210 my @blocks;
211 while( $blockcount < length($substring) ) {
212 push @blocks, substr($substring, $blockcount,$BLOCKLEN);
213 $blockcount += $BLOCKLEN;
214 }
215 $self->_print(sprintf("#%-".($MEGANAMELEN-1)."s%s\n",
216 substr($aln->displayname($seq->get_nse()),
217 0,$MEGANAMELEN-2),
218 join(' ', @blocks)));
219 }
220 $self->_print("\n");
221 $count += $LINELEN;
222 }
223 }
224 $self->flush if $self->_flush_on_write && defined $self->_fh;
225 return 1;
226 }
227
228
229 1;