0
|
1 # $id $
|
|
2 #
|
|
3 # BioPerl module for Bio::AlignIO::meme
|
|
4 # based on the Bio::SeqIO modules
|
|
5 # by Ewan Birney <birney@sanger.ac.uk>
|
|
6 # and Lincoln Stein <lstein@cshl.org>
|
|
7 #
|
|
8 # and the SimpleAlign.pm module of Ewan Birney
|
|
9 #
|
|
10 # Copyright Benjamin Berman
|
|
11 #
|
|
12 # You may distribute this module under the same terms as perl itself
|
|
13 # _history
|
|
14
|
|
15 =head1 NAME
|
|
16
|
|
17 Bio::AlignIO::meme - meme sequence input/output stream
|
|
18
|
|
19 =head1 SYNOPSIS
|
|
20
|
|
21 Do not use this module directly. Use it via the Bio::AlignIO class.
|
|
22
|
|
23 =head1 DESCRIPTION
|
|
24
|
|
25 This object transforms the "sites sorted by p-value" sections of a meme
|
|
26 (text) output file into a series of Bio::SimpleAlign objects. Each
|
|
27 SimpleAlign object contains Bio::LocatableSeq objects which represent the
|
|
28 individual aligned sites as defined by the central portion of the "site"
|
|
29 field in the meme file. The start and end coordinates are derived from
|
|
30 the "Start" field. See L<Bio::SimpleAlign> and L<Bio::LocatableSeq> for
|
|
31 more information.
|
|
32
|
|
33 This module can only parse MEME version 3.0 and greater. Previous versions
|
|
34 have output formats that are more difficult to parse correctly. If the meme
|
|
35 output file is not version 3.0 or greater, we signal an error.
|
|
36
|
|
37 =head1 FEEDBACK
|
|
38
|
|
39 =head2 Reporting Bugs
|
|
40
|
|
41 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
42 the bugs and their resolution.
|
|
43 Bug reports can be submitted via email or the web:
|
|
44
|
|
45 bioperl-bugs@bio.perl.org
|
|
46 http://bugzilla.bioperl.org/
|
|
47
|
|
48 =head1 AUTHORS - Benjamin Berman
|
|
49
|
|
50 (based on the Bio::SeqIO modules by Ewan Birney and others)
|
|
51 Email: benb@fruitfly.berkeley.edu
|
|
52
|
|
53
|
|
54 =head1 APPENDIX
|
|
55
|
|
56 The rest of the documentation details each of the object
|
|
57 methods. Internal methods are usually preceded with an
|
|
58 underscore.
|
|
59
|
|
60 =cut
|
|
61
|
|
62 # Let the code begin...
|
|
63
|
|
64 package Bio::AlignIO::meme;
|
|
65 use vars qw(@ISA);
|
|
66 use strict;
|
|
67
|
|
68 use Bio::AlignIO;
|
|
69 use Bio::LocatableSeq;
|
|
70
|
|
71 @ISA = qw(Bio::AlignIO);
|
|
72
|
|
73 # Constants
|
|
74 my $MEME_VERS_ERR = "MEME output file must be generated by version 3.0 or higher";
|
|
75 my $MEME_NO_HEADER_ERR = "MEME output file contains no header line (ex: MEME version 3.0)";
|
|
76 my $HTML_VERS_ERR = "MEME output file must be generated with the -text option";
|
|
77
|
|
78 =head2 next_aln
|
|
79
|
|
80 Title : next_aln
|
|
81 Usage : $aln = $stream->next_aln()
|
|
82 Function: returns the next alignment in the stream
|
|
83 Returns : Bio::SimpleAlign object
|
|
84 Args : NONE
|
|
85
|
|
86 =cut
|
|
87
|
|
88 sub next_aln {
|
|
89 my ($self) = @_;
|
|
90 my $aln = Bio::SimpleAlign->new(-source => 'meme');
|
|
91 my $line;
|
|
92 my $good_align_sec = 0;
|
|
93 my $in_align_sec = 0;
|
|
94 while (!$good_align_sec && defined($line = $self->_readline()))
|
|
95 {
|
|
96 if (!$in_align_sec)
|
|
97 {
|
|
98 # Check for the meme header
|
|
99 if ($line =~ /^\s*[Mm][Ee][Mm][Ee]\s+version\s+((?:\d+)?\.\d+)/)
|
|
100 {
|
|
101 $self->{'meme_vers'} = $1;
|
|
102 $self->throw($MEME_VERS_ERR) unless ($self->{'meme_vers'} >= 3.0);
|
|
103 $self->{'seen_header'} = 1;
|
|
104 }
|
|
105
|
|
106 # Check if they've output the HTML version
|
|
107 if ($line =~ /\<[Tt][Ii][Tt][Ll][Ee]\>/)
|
|
108 {
|
|
109 $self->throw($HTML_VERS_ERR);
|
|
110 }
|
|
111
|
|
112 # Check if we're going into an alignment section
|
|
113 if ($line =~ /sites sorted by position/) # meme vers > 3.0
|
|
114 {
|
|
115 $self->throw($MEME_NO_HEADER_ERR) unless ($self->{'seen_header'});
|
|
116 $in_align_sec = 1;
|
|
117 }
|
|
118 }
|
|
119 elsif ($line =~ /^\s*(\S+)\s+([+-])\s+(\d+)\s+(\S+)\s+([\.ACTGactg]*) ([ACTGactg]+) ([\.ACTGactg]*)/)
|
|
120 {
|
|
121 # Got a sequence line
|
|
122 my $seq_name = $1;
|
|
123 my $strand = ($2 eq '+') ? 1 : -1;
|
|
124 my $start_pos = $3;
|
|
125 # my $p_val = $4;
|
|
126 # my $left_flank = uc($5);
|
|
127 my $central = uc($6);
|
|
128 # my $right_flank = uc($7);
|
|
129
|
|
130 # Info about the sequence
|
|
131 my $seq_res = $central;
|
|
132 my $seq_len = length($seq_res);
|
|
133
|
|
134 # Info about the flanking sequence
|
|
135 # my $left_len = length($left_flank);
|
|
136 # my $right_len = length($right_flank);
|
|
137 # my $start_len = ($strand > 0) ? $left_len : $right_len;
|
|
138 # my $end_len = ($strand > 0) ? $right_len : $left_len;
|
|
139
|
|
140 # Make the sequence. Meme gives the start coordinate at the left
|
|
141 # hand side of the motif relative to the INPUT sequence.
|
|
142 my $start_coord = $start_pos;
|
|
143 my $end_coord = $start_coord + $seq_len - 1;
|
|
144 my $seq = new Bio::LocatableSeq('-seq'=>$seq_res,
|
|
145 '-id'=>$seq_name,
|
|
146 '-start'=>$start_coord,
|
|
147 '-end'=>$end_coord,
|
|
148 '-strand'=>$strand);
|
|
149
|
|
150 # Make a seq_feature out of the motif
|
|
151 $aln->add_seq($seq);
|
|
152 }
|
|
153 elsif (($line =~ /^\-/) || ($line =~ /Sequence name/))
|
|
154 {
|
|
155 # These are acceptable things to be in the site section
|
|
156 }
|
|
157 elsif ($line =~ /^\s*$/)
|
|
158 {
|
|
159 # This ends the site section
|
|
160 $in_align_sec = 0;
|
|
161 $good_align_sec = 1;
|
|
162 }
|
|
163 else
|
|
164 {
|
|
165 $self->warn("Unrecognized format:\n$line");
|
|
166 return 0;
|
|
167 }
|
|
168 }
|
|
169
|
|
170 # Signal an error if we didn't find a header section
|
|
171 $self->throw($MEME_NO_HEADER_ERR) unless ($self->{'seen_header'});
|
|
172
|
|
173 return (($good_align_sec) ? $aln : 0);
|
|
174 }
|
|
175
|
|
176
|
|
177
|
|
178 =head2 write_aln
|
|
179
|
|
180 Title : write_aln
|
|
181 Usage : $stream->write_aln(@aln)
|
|
182 Function: Not implemented
|
|
183 Returns : 1 for success and 0 for error
|
|
184 Args : Bio::SimpleAlign object
|
|
185
|
|
186 =cut
|
|
187
|
|
188 sub write_aln {
|
|
189 my ($self,@aln) = @_;
|
|
190
|
|
191 # Don't handle it yet.
|
|
192 $self->throw("AlignIO::meme::write_aln not implemented");
|
|
193 return 0;
|
|
194 }
|
|
195
|
|
196
|
|
197
|
|
198 # ----------------------------------------
|
|
199 # - Private methods
|
|
200 # ----------------------------------------
|
|
201
|
|
202
|
|
203
|
|
204 sub _initialize {
|
|
205 my($self,@args) = @_;
|
|
206
|
|
207 # Call into our base version
|
|
208 $self->SUPER::_initialize(@args);
|
|
209
|
|
210 # Then initialize our data variables
|
|
211 $self->{'seen_header'} = 0;
|
|
212 }
|
|
213
|
|
214
|
|
215 1;
|