Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/AlignIO/mega.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: mega.pm,v 1.8 2002/10/22 07:45:10 lapp Exp $ | |
2 # | |
3 # BioPerl module for Bio::AlignIO::mega | |
4 # | |
5 # Cared for by Jason Stajich <jason@bioperl.org> | |
6 # | |
7 # Copyright Jason Stajich | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::AlignIO::mega - Parse and Create MEGA format data files | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 use Bio::AlignIO; | |
20 my $alignio = new Bio::AlignIO(-format => 'mega', | |
21 -file => 't/data/hemoglobinA.meg'); | |
22 | |
23 while( my $aln = $alignio->next_aln ) { | |
24 # process each alignment or convert to another format like NEXUS | |
25 } | |
26 | |
27 =head1 DESCRIPTION | |
28 | |
29 This object handles reading and writing data streams in the MEGA | |
30 format (Kumar and Nei). | |
31 | |
32 | |
33 =head1 FEEDBACK | |
34 | |
35 | |
36 =head2 Mailing Lists | |
37 | |
38 User feedback is an integral part of the evolution of this and other | |
39 Bioperl modules. Send your comments and suggestions preferably to | |
40 the Bioperl mailing list. Your participation is much appreciated. | |
41 | |
42 bioperl-l@bioperl.org - General discussion | |
43 http://bioperl.org/MailList.shtml - About the mailing lists | |
44 | |
45 =head2 Reporting Bugs | |
46 | |
47 Report bugs to the Bioperl bug tracking system to help us keep track | |
48 of the bugs and their resolution. Bug reports can be submitted via | |
49 email or the web: | |
50 | |
51 bioperl-bugs@bioperl.org | |
52 http://bugzilla.bioperl.org/ | |
53 | |
54 =head1 AUTHOR - Jason Stajich | |
55 | |
56 Email jason@bioperl.org | |
57 | |
58 Describe contact details here | |
59 | |
60 =head1 CONTRIBUTORS | |
61 | |
62 Additional contributors names and emails here | |
63 | |
64 =head1 APPENDIX | |
65 | |
66 The rest of the documentation details each of the object methods. | |
67 Internal methods are usually preceded with a _ | |
68 | |
69 =cut | |
70 | |
71 | |
72 # Let the code begin... | |
73 | |
74 | |
75 package Bio::AlignIO::mega; | |
76 use vars qw(@ISA $MEGANAMELEN %VALID_TYPES $LINELEN $BLOCKLEN); | |
77 use strict; | |
78 | |
79 use Bio::AlignIO; | |
80 use Bio::SimpleAlign; | |
81 use Bio::LocatableSeq; | |
82 | |
83 BEGIN { | |
84 $MEGANAMELEN = 10; | |
85 $LINELEN = 60; | |
86 $BLOCKLEN = 10; | |
87 %VALID_TYPES = map {$_, 1} qw( dna rna protein standard); | |
88 } | |
89 @ISA = qw(Bio::AlignIO ); | |
90 | |
91 | |
92 =head2 next_aln | |
93 | |
94 Title : next_aln | |
95 Usage : $aln = $stream->next_aln() | |
96 Function: returns the next alignment in the stream. | |
97 Supports the following MEGA format features: | |
98 - The file has to start with '#mega' | |
99 - Reads in the name of the alignment from a comment | |
100 (anything after '!TITLE: ') . | |
101 - Reads in the format parameters datatype | |
102 | |
103 Returns : L<Bio::Align::AlignI> object - returns 0 on end of file | |
104 or on error | |
105 Args : NONE | |
106 | |
107 | |
108 =cut | |
109 | |
110 sub next_aln{ | |
111 my ($self) = @_; | |
112 my $entry; | |
113 my ($alphabet,%seqs); | |
114 | |
115 my $aln = Bio::SimpleAlign->new(-source => 'mega'); | |
116 | |
117 while( defined($entry = $self->_readline()) && ($entry =~ /^\s+$/) ) {} | |
118 | |
119 $self->throw("Not a valid MEGA file! [#mega] not starting the file!") | |
120 unless $entry =~ /^#mega/i; | |
121 | |
122 while( defined($entry = $self->_readline() ) ) { | |
123 local($_) = $entry; | |
124 if(/\!Title:\s*([^\;]+)\s*/i) { $aln->id($1)} | |
125 elsif( s/\!Format\s+([^\;]+)\s*/$1/ ) { | |
126 my (@fields) = split(/\s+/,$1); | |
127 foreach my $f ( @fields ) { | |
128 my ($name,$value) = split(/\=/,$f); | |
129 if( $name eq 'datatype' ) { | |
130 $alphabet = $value; | |
131 } elsif( $name eq 'identical' ) { | |
132 $aln->match_char($value); | |
133 } elsif( $name eq 'indel' ) { | |
134 $aln->gap_char($value); | |
135 } | |
136 } | |
137 } elsif( /^\#/ ) { | |
138 last; | |
139 } | |
140 } | |
141 my @order; | |
142 while( defined($entry) ) { | |
143 if( $entry !~ /^\s+$/ ) { | |
144 # this is to skip the leading '#' | |
145 my $seqname = substr($entry,1,$MEGANAMELEN-1); | |
146 $seqname =~ s/(\S+)\s+$/$1/g; | |
147 my $line = substr($entry,$MEGANAMELEN); | |
148 $line =~ s/\s+//g; | |
149 if( ! defined $seqs{$seqname} ) {push @order, $seqname; } | |
150 $seqs{$seqname} .= $line; | |
151 } | |
152 $entry = $self->_readline(); | |
153 } | |
154 | |
155 foreach my $seqname ( @order ) { | |
156 my $s = $seqs{$seqname}; | |
157 $s =~ s/\-//g; | |
158 my $end = length($s); | |
159 my $seq = new Bio::LocatableSeq(-alphabet => $alphabet, | |
160 -id => $seqname, | |
161 -seq => $seqs{$seqname}, | |
162 -start => 1, | |
163 -end => $end); | |
164 | |
165 $aln->add_seq($seq); | |
166 } | |
167 return $aln; | |
168 } | |
169 | |
170 =head2 write_aln | |
171 | |
172 Title : write_aln | |
173 Usage : $stream->write_aln(@aln) | |
174 Function: writes the $aln object into the stream in MEGA format | |
175 Returns : 1 for success and 0 for error | |
176 Args : L<Bio::Align::AlignI> object | |
177 | |
178 =cut | |
179 | |
180 sub write_aln{ | |
181 my ($self,@aln) = @_; | |
182 my $count = 0; | |
183 my $wrapped = 0; | |
184 my $maxname; | |
185 | |
186 foreach my $aln ( @aln ) { | |
187 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) { | |
188 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln"); | |
189 return 0; | |
190 } elsif( ! $aln->is_flush($self->verbose) ) { | |
191 $self->warn("All Sequences in the alignment must be the same length"); | |
192 return 0; | |
193 } | |
194 $aln->match(); | |
195 my $len = $aln->length(); | |
196 my $format = sprintf('datatype=%s identical=%s indel=%s;', | |
197 $aln->get_seq_by_pos(1)->alphabet(), | |
198 $aln->match_char, $aln->gap_char); | |
199 | |
200 $self->_print(sprintf("#mega\n!Title: %s;\n!Format %s\n\n\n", | |
201 $aln->id, $format)); | |
202 | |
203 my ($count, $blockcount,$length) = ( 0,0,$aln->length()); | |
204 $aln->set_displayname_flat(); | |
205 while( $count < $length ) { | |
206 foreach my $seq ( $aln->each_seq ) { | |
207 my $seqchars = $seq->seq(); | |
208 $blockcount = 0; | |
209 my $substring = substr($seqchars, $count, $LINELEN); | |
210 my @blocks; | |
211 while( $blockcount < length($substring) ) { | |
212 push @blocks, substr($substring, $blockcount,$BLOCKLEN); | |
213 $blockcount += $BLOCKLEN; | |
214 } | |
215 $self->_print(sprintf("#%-".($MEGANAMELEN-1)."s%s\n", | |
216 substr($aln->displayname($seq->get_nse()), | |
217 0,$MEGANAMELEN-2), | |
218 join(' ', @blocks))); | |
219 } | |
220 $self->_print("\n"); | |
221 $count += $LINELEN; | |
222 } | |
223 } | |
224 $self->flush if $self->_flush_on_write && defined $self->_fh; | |
225 return 1; | |
226 } | |
227 | |
228 | |
229 1; |