Mercurial > repos > mahtabm > ensemb_rep_gvl
diff variant_effect_predictor/Bio/AlignIO/meme.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/AlignIO/meme.pm Thu Apr 11 06:29:17 2013 -0400 @@ -0,0 +1,215 @@ +# $id $ +# +# BioPerl module for Bio::AlignIO::meme +# based on the Bio::SeqIO modules +# by Ewan Birney <birney@sanger.ac.uk> +# and Lincoln Stein <lstein@cshl.org> +# +# and the SimpleAlign.pm module of Ewan Birney +# +# Copyright Benjamin Berman +# +# You may distribute this module under the same terms as perl itself +# _history + +=head1 NAME + +Bio::AlignIO::meme - meme sequence input/output stream + +=head1 SYNOPSIS + +Do not use this module directly. Use it via the Bio::AlignIO class. + +=head1 DESCRIPTION + +This object transforms the "sites sorted by p-value" sections of a meme +(text) output file into a series of Bio::SimpleAlign objects. Each +SimpleAlign object contains Bio::LocatableSeq objects which represent the +individual aligned sites as defined by the central portion of the "site" +field in the meme file. The start and end coordinates are derived from +the "Start" field. See L<Bio::SimpleAlign> and L<Bio::LocatableSeq> for +more information. + +This module can only parse MEME version 3.0 and greater. Previous versions +have output formats that are more difficult to parse correctly. If the meme +output file is not version 3.0 or greater, we signal an error. + +=head1 FEEDBACK + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +the bugs and their resolution. +Bug reports can be submitted via email or the web: + + bioperl-bugs@bio.perl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHORS - Benjamin Berman + + (based on the Bio::SeqIO modules by Ewan Birney and others) + Email: benb@fruitfly.berkeley.edu + + +=head1 APPENDIX + +The rest of the documentation details each of the object +methods. Internal methods are usually preceded with an +underscore. + +=cut + +# Let the code begin... + +package Bio::AlignIO::meme; +use vars qw(@ISA); +use strict; + +use Bio::AlignIO; +use Bio::LocatableSeq; + +@ISA = qw(Bio::AlignIO); + +# Constants +my $MEME_VERS_ERR = "MEME output file must be generated by version 3.0 or higher"; +my $MEME_NO_HEADER_ERR = "MEME output file contains no header line (ex: MEME version 3.0)"; +my $HTML_VERS_ERR = "MEME output file must be generated with the -text option"; + +=head2 next_aln + + Title : next_aln + Usage : $aln = $stream->next_aln() + Function: returns the next alignment in the stream + Returns : Bio::SimpleAlign object + Args : NONE + +=cut + +sub next_aln { + my ($self) = @_; + my $aln = Bio::SimpleAlign->new(-source => 'meme'); + my $line; + my $good_align_sec = 0; + my $in_align_sec = 0; + while (!$good_align_sec && defined($line = $self->_readline())) + { + if (!$in_align_sec) + { + # Check for the meme header + if ($line =~ /^\s*[Mm][Ee][Mm][Ee]\s+version\s+((?:\d+)?\.\d+)/) + { + $self->{'meme_vers'} = $1; + $self->throw($MEME_VERS_ERR) unless ($self->{'meme_vers'} >= 3.0); + $self->{'seen_header'} = 1; + } + + # Check if they've output the HTML version + if ($line =~ /\<[Tt][Ii][Tt][Ll][Ee]\>/) + { + $self->throw($HTML_VERS_ERR); + } + + # Check if we're going into an alignment section + if ($line =~ /sites sorted by position/) # meme vers > 3.0 + { + $self->throw($MEME_NO_HEADER_ERR) unless ($self->{'seen_header'}); + $in_align_sec = 1; + } + } + elsif ($line =~ /^\s*(\S+)\s+([+-])\s+(\d+)\s+(\S+)\s+([\.ACTGactg]*) ([ACTGactg]+) ([\.ACTGactg]*)/) + { + # Got a sequence line + my $seq_name = $1; + my $strand = ($2 eq '+') ? 1 : -1; + my $start_pos = $3; + # my $p_val = $4; + # my $left_flank = uc($5); + my $central = uc($6); + # my $right_flank = uc($7); + + # Info about the sequence + my $seq_res = $central; + my $seq_len = length($seq_res); + + # Info about the flanking sequence + # my $left_len = length($left_flank); + # my $right_len = length($right_flank); + # my $start_len = ($strand > 0) ? $left_len : $right_len; + # my $end_len = ($strand > 0) ? $right_len : $left_len; + + # Make the sequence. Meme gives the start coordinate at the left + # hand side of the motif relative to the INPUT sequence. + my $start_coord = $start_pos; + my $end_coord = $start_coord + $seq_len - 1; + my $seq = new Bio::LocatableSeq('-seq'=>$seq_res, + '-id'=>$seq_name, + '-start'=>$start_coord, + '-end'=>$end_coord, + '-strand'=>$strand); + + # Make a seq_feature out of the motif + $aln->add_seq($seq); + } + elsif (($line =~ /^\-/) || ($line =~ /Sequence name/)) + { + # These are acceptable things to be in the site section + } + elsif ($line =~ /^\s*$/) + { + # This ends the site section + $in_align_sec = 0; + $good_align_sec = 1; + } + else + { + $self->warn("Unrecognized format:\n$line"); + return 0; + } + } + + # Signal an error if we didn't find a header section + $self->throw($MEME_NO_HEADER_ERR) unless ($self->{'seen_header'}); + + return (($good_align_sec) ? $aln : 0); +} + + + +=head2 write_aln + + Title : write_aln + Usage : $stream->write_aln(@aln) + Function: Not implemented + Returns : 1 for success and 0 for error + Args : Bio::SimpleAlign object + +=cut + +sub write_aln { + my ($self,@aln) = @_; + + # Don't handle it yet. + $self->throw("AlignIO::meme::write_aln not implemented"); + return 0; +} + + + +# ---------------------------------------- +# - Private methods +# ---------------------------------------- + + + +sub _initialize { + my($self,@args) = @_; + + # Call into our base version + $self->SUPER::_initialize(@args); + + # Then initialize our data variables + $self->{'seen_header'} = 0; +} + + +1;