Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/AlignIO/mega.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: mega.pm,v 1.8 2002/10/22 07:45:10 lapp Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::AlignIO::mega | |
| 4 # | |
| 5 # Cared for by Jason Stajich <jason@bioperl.org> | |
| 6 # | |
| 7 # Copyright Jason Stajich | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::AlignIO::mega - Parse and Create MEGA format data files | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 use Bio::AlignIO; | |
| 20 my $alignio = new Bio::AlignIO(-format => 'mega', | |
| 21 -file => 't/data/hemoglobinA.meg'); | |
| 22 | |
| 23 while( my $aln = $alignio->next_aln ) { | |
| 24 # process each alignment or convert to another format like NEXUS | |
| 25 } | |
| 26 | |
| 27 =head1 DESCRIPTION | |
| 28 | |
| 29 This object handles reading and writing data streams in the MEGA | |
| 30 format (Kumar and Nei). | |
| 31 | |
| 32 | |
| 33 =head1 FEEDBACK | |
| 34 | |
| 35 | |
| 36 =head2 Mailing Lists | |
| 37 | |
| 38 User feedback is an integral part of the evolution of this and other | |
| 39 Bioperl modules. Send your comments and suggestions preferably to | |
| 40 the Bioperl mailing list. Your participation is much appreciated. | |
| 41 | |
| 42 bioperl-l@bioperl.org - General discussion | |
| 43 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 44 | |
| 45 =head2 Reporting Bugs | |
| 46 | |
| 47 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 48 of the bugs and their resolution. Bug reports can be submitted via | |
| 49 email or the web: | |
| 50 | |
| 51 bioperl-bugs@bioperl.org | |
| 52 http://bugzilla.bioperl.org/ | |
| 53 | |
| 54 =head1 AUTHOR - Jason Stajich | |
| 55 | |
| 56 Email jason@bioperl.org | |
| 57 | |
| 58 Describe contact details here | |
| 59 | |
| 60 =head1 CONTRIBUTORS | |
| 61 | |
| 62 Additional contributors names and emails here | |
| 63 | |
| 64 =head1 APPENDIX | |
| 65 | |
| 66 The rest of the documentation details each of the object methods. | |
| 67 Internal methods are usually preceded with a _ | |
| 68 | |
| 69 =cut | |
| 70 | |
| 71 | |
| 72 # Let the code begin... | |
| 73 | |
| 74 | |
| 75 package Bio::AlignIO::mega; | |
| 76 use vars qw(@ISA $MEGANAMELEN %VALID_TYPES $LINELEN $BLOCKLEN); | |
| 77 use strict; | |
| 78 | |
| 79 use Bio::AlignIO; | |
| 80 use Bio::SimpleAlign; | |
| 81 use Bio::LocatableSeq; | |
| 82 | |
| 83 BEGIN { | |
| 84 $MEGANAMELEN = 10; | |
| 85 $LINELEN = 60; | |
| 86 $BLOCKLEN = 10; | |
| 87 %VALID_TYPES = map {$_, 1} qw( dna rna protein standard); | |
| 88 } | |
| 89 @ISA = qw(Bio::AlignIO ); | |
| 90 | |
| 91 | |
| 92 =head2 next_aln | |
| 93 | |
| 94 Title : next_aln | |
| 95 Usage : $aln = $stream->next_aln() | |
| 96 Function: returns the next alignment in the stream. | |
| 97 Supports the following MEGA format features: | |
| 98 - The file has to start with '#mega' | |
| 99 - Reads in the name of the alignment from a comment | |
| 100 (anything after '!TITLE: ') . | |
| 101 - Reads in the format parameters datatype | |
| 102 | |
| 103 Returns : L<Bio::Align::AlignI> object - returns 0 on end of file | |
| 104 or on error | |
| 105 Args : NONE | |
| 106 | |
| 107 | |
| 108 =cut | |
| 109 | |
| 110 sub next_aln{ | |
| 111 my ($self) = @_; | |
| 112 my $entry; | |
| 113 my ($alphabet,%seqs); | |
| 114 | |
| 115 my $aln = Bio::SimpleAlign->new(-source => 'mega'); | |
| 116 | |
| 117 while( defined($entry = $self->_readline()) && ($entry =~ /^\s+$/) ) {} | |
| 118 | |
| 119 $self->throw("Not a valid MEGA file! [#mega] not starting the file!") | |
| 120 unless $entry =~ /^#mega/i; | |
| 121 | |
| 122 while( defined($entry = $self->_readline() ) ) { | |
| 123 local($_) = $entry; | |
| 124 if(/\!Title:\s*([^\;]+)\s*/i) { $aln->id($1)} | |
| 125 elsif( s/\!Format\s+([^\;]+)\s*/$1/ ) { | |
| 126 my (@fields) = split(/\s+/,$1); | |
| 127 foreach my $f ( @fields ) { | |
| 128 my ($name,$value) = split(/\=/,$f); | |
| 129 if( $name eq 'datatype' ) { | |
| 130 $alphabet = $value; | |
| 131 } elsif( $name eq 'identical' ) { | |
| 132 $aln->match_char($value); | |
| 133 } elsif( $name eq 'indel' ) { | |
| 134 $aln->gap_char($value); | |
| 135 } | |
| 136 } | |
| 137 } elsif( /^\#/ ) { | |
| 138 last; | |
| 139 } | |
| 140 } | |
| 141 my @order; | |
| 142 while( defined($entry) ) { | |
| 143 if( $entry !~ /^\s+$/ ) { | |
| 144 # this is to skip the leading '#' | |
| 145 my $seqname = substr($entry,1,$MEGANAMELEN-1); | |
| 146 $seqname =~ s/(\S+)\s+$/$1/g; | |
| 147 my $line = substr($entry,$MEGANAMELEN); | |
| 148 $line =~ s/\s+//g; | |
| 149 if( ! defined $seqs{$seqname} ) {push @order, $seqname; } | |
| 150 $seqs{$seqname} .= $line; | |
| 151 } | |
| 152 $entry = $self->_readline(); | |
| 153 } | |
| 154 | |
| 155 foreach my $seqname ( @order ) { | |
| 156 my $s = $seqs{$seqname}; | |
| 157 $s =~ s/\-//g; | |
| 158 my $end = length($s); | |
| 159 my $seq = new Bio::LocatableSeq(-alphabet => $alphabet, | |
| 160 -id => $seqname, | |
| 161 -seq => $seqs{$seqname}, | |
| 162 -start => 1, | |
| 163 -end => $end); | |
| 164 | |
| 165 $aln->add_seq($seq); | |
| 166 } | |
| 167 return $aln; | |
| 168 } | |
| 169 | |
| 170 =head2 write_aln | |
| 171 | |
| 172 Title : write_aln | |
| 173 Usage : $stream->write_aln(@aln) | |
| 174 Function: writes the $aln object into the stream in MEGA format | |
| 175 Returns : 1 for success and 0 for error | |
| 176 Args : L<Bio::Align::AlignI> object | |
| 177 | |
| 178 =cut | |
| 179 | |
| 180 sub write_aln{ | |
| 181 my ($self,@aln) = @_; | |
| 182 my $count = 0; | |
| 183 my $wrapped = 0; | |
| 184 my $maxname; | |
| 185 | |
| 186 foreach my $aln ( @aln ) { | |
| 187 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) { | |
| 188 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln"); | |
| 189 return 0; | |
| 190 } elsif( ! $aln->is_flush($self->verbose) ) { | |
| 191 $self->warn("All Sequences in the alignment must be the same length"); | |
| 192 return 0; | |
| 193 } | |
| 194 $aln->match(); | |
| 195 my $len = $aln->length(); | |
| 196 my $format = sprintf('datatype=%s identical=%s indel=%s;', | |
| 197 $aln->get_seq_by_pos(1)->alphabet(), | |
| 198 $aln->match_char, $aln->gap_char); | |
| 199 | |
| 200 $self->_print(sprintf("#mega\n!Title: %s;\n!Format %s\n\n\n", | |
| 201 $aln->id, $format)); | |
| 202 | |
| 203 my ($count, $blockcount,$length) = ( 0,0,$aln->length()); | |
| 204 $aln->set_displayname_flat(); | |
| 205 while( $count < $length ) { | |
| 206 foreach my $seq ( $aln->each_seq ) { | |
| 207 my $seqchars = $seq->seq(); | |
| 208 $blockcount = 0; | |
| 209 my $substring = substr($seqchars, $count, $LINELEN); | |
| 210 my @blocks; | |
| 211 while( $blockcount < length($substring) ) { | |
| 212 push @blocks, substr($substring, $blockcount,$BLOCKLEN); | |
| 213 $blockcount += $BLOCKLEN; | |
| 214 } | |
| 215 $self->_print(sprintf("#%-".($MEGANAMELEN-1)."s%s\n", | |
| 216 substr($aln->displayname($seq->get_nse()), | |
| 217 0,$MEGANAMELEN-2), | |
| 218 join(' ', @blocks))); | |
| 219 } | |
| 220 $self->_print("\n"); | |
| 221 $count += $LINELEN; | |
| 222 } | |
| 223 } | |
| 224 $self->flush if $self->_flush_on_write && defined $self->_fh; | |
| 225 return 1; | |
| 226 } | |
| 227 | |
| 228 | |
| 229 1; |
