comparison variant_effect_predictor/Bio/AlignIO/meme.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $id $
2 #
3 # BioPerl module for Bio::AlignIO::meme
4 # based on the Bio::SeqIO modules
5 # by Ewan Birney <birney@sanger.ac.uk>
6 # and Lincoln Stein <lstein@cshl.org>
7 #
8 # and the SimpleAlign.pm module of Ewan Birney
9 #
10 # Copyright Benjamin Berman
11 #
12 # You may distribute this module under the same terms as perl itself
13 # _history
14
15 =head1 NAME
16
17 Bio::AlignIO::meme - meme sequence input/output stream
18
19 =head1 SYNOPSIS
20
21 Do not use this module directly. Use it via the Bio::AlignIO class.
22
23 =head1 DESCRIPTION
24
25 This object transforms the "sites sorted by p-value" sections of a meme
26 (text) output file into a series of Bio::SimpleAlign objects. Each
27 SimpleAlign object contains Bio::LocatableSeq objects which represent the
28 individual aligned sites as defined by the central portion of the "site"
29 field in the meme file. The start and end coordinates are derived from
30 the "Start" field. See L<Bio::SimpleAlign> and L<Bio::LocatableSeq> for
31 more information.
32
33 This module can only parse MEME version 3.0 and greater. Previous versions
34 have output formats that are more difficult to parse correctly. If the meme
35 output file is not version 3.0 or greater, we signal an error.
36
37 =head1 FEEDBACK
38
39 =head2 Reporting Bugs
40
41 Report bugs to the Bioperl bug tracking system to help us keep track
42 the bugs and their resolution.
43 Bug reports can be submitted via email or the web:
44
45 bioperl-bugs@bio.perl.org
46 http://bugzilla.bioperl.org/
47
48 =head1 AUTHORS - Benjamin Berman
49
50 (based on the Bio::SeqIO modules by Ewan Birney and others)
51 Email: benb@fruitfly.berkeley.edu
52
53
54 =head1 APPENDIX
55
56 The rest of the documentation details each of the object
57 methods. Internal methods are usually preceded with an
58 underscore.
59
60 =cut
61
62 # Let the code begin...
63
64 package Bio::AlignIO::meme;
65 use vars qw(@ISA);
66 use strict;
67
68 use Bio::AlignIO;
69 use Bio::LocatableSeq;
70
71 @ISA = qw(Bio::AlignIO);
72
73 # Constants
74 my $MEME_VERS_ERR = "MEME output file must be generated by version 3.0 or higher";
75 my $MEME_NO_HEADER_ERR = "MEME output file contains no header line (ex: MEME version 3.0)";
76 my $HTML_VERS_ERR = "MEME output file must be generated with the -text option";
77
78 =head2 next_aln
79
80 Title : next_aln
81 Usage : $aln = $stream->next_aln()
82 Function: returns the next alignment in the stream
83 Returns : Bio::SimpleAlign object
84 Args : NONE
85
86 =cut
87
88 sub next_aln {
89 my ($self) = @_;
90 my $aln = Bio::SimpleAlign->new(-source => 'meme');
91 my $line;
92 my $good_align_sec = 0;
93 my $in_align_sec = 0;
94 while (!$good_align_sec && defined($line = $self->_readline()))
95 {
96 if (!$in_align_sec)
97 {
98 # Check for the meme header
99 if ($line =~ /^\s*[Mm][Ee][Mm][Ee]\s+version\s+((?:\d+)?\.\d+)/)
100 {
101 $self->{'meme_vers'} = $1;
102 $self->throw($MEME_VERS_ERR) unless ($self->{'meme_vers'} >= 3.0);
103 $self->{'seen_header'} = 1;
104 }
105
106 # Check if they've output the HTML version
107 if ($line =~ /\<[Tt][Ii][Tt][Ll][Ee]\>/)
108 {
109 $self->throw($HTML_VERS_ERR);
110 }
111
112 # Check if we're going into an alignment section
113 if ($line =~ /sites sorted by position/) # meme vers > 3.0
114 {
115 $self->throw($MEME_NO_HEADER_ERR) unless ($self->{'seen_header'});
116 $in_align_sec = 1;
117 }
118 }
119 elsif ($line =~ /^\s*(\S+)\s+([+-])\s+(\d+)\s+(\S+)\s+([\.ACTGactg]*) ([ACTGactg]+) ([\.ACTGactg]*)/)
120 {
121 # Got a sequence line
122 my $seq_name = $1;
123 my $strand = ($2 eq '+') ? 1 : -1;
124 my $start_pos = $3;
125 # my $p_val = $4;
126 # my $left_flank = uc($5);
127 my $central = uc($6);
128 # my $right_flank = uc($7);
129
130 # Info about the sequence
131 my $seq_res = $central;
132 my $seq_len = length($seq_res);
133
134 # Info about the flanking sequence
135 # my $left_len = length($left_flank);
136 # my $right_len = length($right_flank);
137 # my $start_len = ($strand > 0) ? $left_len : $right_len;
138 # my $end_len = ($strand > 0) ? $right_len : $left_len;
139
140 # Make the sequence. Meme gives the start coordinate at the left
141 # hand side of the motif relative to the INPUT sequence.
142 my $start_coord = $start_pos;
143 my $end_coord = $start_coord + $seq_len - 1;
144 my $seq = new Bio::LocatableSeq('-seq'=>$seq_res,
145 '-id'=>$seq_name,
146 '-start'=>$start_coord,
147 '-end'=>$end_coord,
148 '-strand'=>$strand);
149
150 # Make a seq_feature out of the motif
151 $aln->add_seq($seq);
152 }
153 elsif (($line =~ /^\-/) || ($line =~ /Sequence name/))
154 {
155 # These are acceptable things to be in the site section
156 }
157 elsif ($line =~ /^\s*$/)
158 {
159 # This ends the site section
160 $in_align_sec = 0;
161 $good_align_sec = 1;
162 }
163 else
164 {
165 $self->warn("Unrecognized format:\n$line");
166 return 0;
167 }
168 }
169
170 # Signal an error if we didn't find a header section
171 $self->throw($MEME_NO_HEADER_ERR) unless ($self->{'seen_header'});
172
173 return (($good_align_sec) ? $aln : 0);
174 }
175
176
177
178 =head2 write_aln
179
180 Title : write_aln
181 Usage : $stream->write_aln(@aln)
182 Function: Not implemented
183 Returns : 1 for success and 0 for error
184 Args : Bio::SimpleAlign object
185
186 =cut
187
188 sub write_aln {
189 my ($self,@aln) = @_;
190
191 # Don't handle it yet.
192 $self->throw("AlignIO::meme::write_aln not implemented");
193 return 0;
194 }
195
196
197
198 # ----------------------------------------
199 # - Private methods
200 # ----------------------------------------
201
202
203
204 sub _initialize {
205 my($self,@args) = @_;
206
207 # Call into our base version
208 $self->SUPER::_initialize(@args);
209
210 # Then initialize our data variables
211 $self->{'seen_header'} = 0;
212 }
213
214
215 1;