Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/AlignIO/stockholm.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/AlignIO/stockholm.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,190 @@ +# $Id: stockholm.pm,v 1.10.2.1 2003/03/14 09:14:59 heikki Exp $ +# +# BioPerl module for Bio::AlignIO::stockholm + +# based on the Bio::SeqIO::stockholm module +# by Ewan Birney <birney@sanger.ac.uk> +# and Lincoln Stein <lstein@cshl.org> +# +# and the SimpleAlign.pm module of Ewan Birney +# +# Copyright Peter Schattner +# +# You may distribute this module under the same terms as perl itself +# _history +# September 5, 2000 +# POD documentation - main docs before the code + +=head1 NAME + +Bio::AlignIO::stockholm - stockholm sequence input/output stream + +=head1 SYNOPSIS + +Do not use this module directly. Use it via the L<Bio::AlignIO> class. + +=head1 DESCRIPTION + +This object can transform L<Bio::Align::AlignI> objects to and from +stockholm flat file databases. + +=head1 FEEDBACK + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +the bugs and their resolution. Bug reports can be submitted via email +or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHORS - Peter Schattner + +Email: schattner@alum.mit.edu + +=head1 CONTRIBUTORS + +Andreas Kahari, ak@ebi.ac.uk + +=head1 APPENDIX + +The rest of the documentation details each of the object +methods. Internal methods are usually preceded with a _ + +=cut + +# Let the code begin... + +package Bio::AlignIO::stockholm; +use vars qw(@ISA); +use strict; + +use Bio::AlignIO; + +@ISA = qw(Bio::AlignIO); + +=head2 next_aln + + Title : next_aln + Usage : $aln = $stream->next_aln() + Function: returns the next alignment in the stream. + Returns : L<Bio::Align::AlignI> object + Args : NONE + +=cut + +sub next_aln { + my $self = shift; + my $entry; + + my ($start,$end,%align,$name,$seqname,$seq,$count, + %hash,%c2name, %accession, $no); + + # in stockholm format, every non-blank line that does not start + # with '#=' is an alignment segment; the '#=' lines are mark up lines. + # Of particular interest are the '#=GF <name/st-ed> AC <accession>' + # lines, which give accession numbers for each segment + + my $aln = Bio::SimpleAlign->new(-source => 'stockholm'); + + while( defined($entry = $self->_readline) ) { + $entry !~ /\w+/ && next; + + if ($entry =~ /^#\s*STOCKHOLM\s+/) { + last; + } + else { + $self->throw("Not Stockholm format: Expecting \"# STOCKHOLM 1.0\"; Found \"$_\""); + } + } +# +# Next section is same as for selex format +# + while( defined($entry = $self->_readline) ) { + # Double slash (//) signals end of file. The flat Pfam-A data from + # ftp://ftp.sanger.ac.uk/pub/databases/Pfam/Pfam-A.full.gz consists + # of several concatenated Stockholm-formatted files. The following + # line makes it possible to parse it without this module trying to + # read the whole file into memory. Andreas Kähäri 10/3/2003. + last if $entry =~ /^\/\//; + + # Extra bonus: Get the name of the alignment. + # Andreas Kähäri 10/3/2003. + if ($entry =~ /^#=GF\s+AC\s+(\S+)/) { + $aln->id($1); + next; + } + + $entry =~ /^#=GS\s+(\S+)\s+AC\s+(\S+)/ && do { + $accession{ $1 } = $2; + next; + }; + $entry =~ /^([A-Za-z.-]+)$/ && ( $align{$name} .= $1 ) && next; + $entry !~ /^([^#]\S+)\s+([A-Za-z.-]+)\s*/ && next; + + + $name = $1; + $seq = $2; + + if( ! defined $align{$name} ) { + $count++; + $c2name{$count} = $name; + } + $align{$name} .= $seq; + } + + # ok... now we can make the sequences + + foreach $no ( sort { $a <=> $b } keys %c2name ) { + $name = $c2name{$no}; + + if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) { + $seqname = $1; + $start = $2; + $end = $3; + } else { + $seqname=$name; + $start = 1; + $end = length($align{$name}); + } + $seq = new Bio::LocatableSeq('-seq'=>$align{$name}, + '-id'=>$seqname, + '-start'=>$start, + '-end'=>$end, + '-type'=>'aligned', + '-accession_number' => $accession{$name}, + + ); + + $aln->add_seq($seq); + + } + +# If $end <= 0, we have either reached the end of +# file in <fh> or we have encountered some other error +# + if ($end <= 0) { undef $aln;} + + return $aln; +} + + +=head2 write_aln + + Title : write_aln + Usage : $stream->write_aln(@aln) + Function: writes the $aln object into the stream in stockholm format ###Not yet implemented!### + Returns : 1 for success and 0 for error + Args : L<Bio::Align::AlignI> object + + +=cut + +sub write_aln { + my ($self,@aln) = @_; + + $self->throw("Sorry: stockholm-format output, not yet implemented! /n"); +} + +1;