annotate variant_effect_predictor/Bio/AlignIO/meme.pm @ 2:a5976b2dce6f

changing defualt values for ensembl database
author mahtabm
date Thu, 11 Apr 2013 17:15:42 +1000
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $id $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 # BioPerl module for Bio::AlignIO::meme
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 # based on the Bio::SeqIO modules
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # by Ewan Birney <birney@sanger.ac.uk>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 # and Lincoln Stein <lstein@cshl.org>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 # and the SimpleAlign.pm module of Ewan Birney
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 # Copyright Benjamin Berman
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 # You may distribute this module under the same terms as perl itself
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 # _history
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 Bio::AlignIO::meme - meme sequence input/output stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 Do not use this module directly. Use it via the Bio::AlignIO class.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 This object transforms the "sites sorted by p-value" sections of a meme
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 (text) output file into a series of Bio::SimpleAlign objects. Each
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 SimpleAlign object contains Bio::LocatableSeq objects which represent the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 individual aligned sites as defined by the central portion of the "site"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 field in the meme file. The start and end coordinates are derived from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 the "Start" field. See L<Bio::SimpleAlign> and L<Bio::LocatableSeq> for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 more information.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 This module can only parse MEME version 3.0 and greater. Previous versions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 have output formats that are more difficult to parse correctly. If the meme
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 output file is not version 3.0 or greater, we signal an error.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 the bugs and their resolution.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 Bug reports can be submitted via email or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 bioperl-bugs@bio.perl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 =head1 AUTHORS - Benjamin Berman
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 (based on the Bio::SeqIO modules by Ewan Birney and others)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 Email: benb@fruitfly.berkeley.edu
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 =head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 The rest of the documentation details each of the object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 methods. Internal methods are usually preceded with an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 underscore.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 # Let the code begin...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 package Bio::AlignIO::meme;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 use Bio::AlignIO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 use Bio::LocatableSeq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 @ISA = qw(Bio::AlignIO);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 # Constants
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 my $MEME_VERS_ERR = "MEME output file must be generated by version 3.0 or higher";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 my $MEME_NO_HEADER_ERR = "MEME output file contains no header line (ex: MEME version 3.0)";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 my $HTML_VERS_ERR = "MEME output file must be generated with the -text option";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 =head2 next_aln
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 Title : next_aln
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 Usage : $aln = $stream->next_aln()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 Function: returns the next alignment in the stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 Returns : Bio::SimpleAlign object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 Args : NONE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 sub next_aln {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 my $aln = Bio::SimpleAlign->new(-source => 'meme');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 my $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 my $good_align_sec = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 my $in_align_sec = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 while (!$good_align_sec && defined($line = $self->_readline()))
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 if (!$in_align_sec)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 # Check for the meme header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 if ($line =~ /^\s*[Mm][Ee][Mm][Ee]\s+version\s+((?:\d+)?\.\d+)/)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 $self->{'meme_vers'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 $self->throw($MEME_VERS_ERR) unless ($self->{'meme_vers'} >= 3.0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 $self->{'seen_header'} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 # Check if they've output the HTML version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 if ($line =~ /\<[Tt][Ii][Tt][Ll][Ee]\>/)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 $self->throw($HTML_VERS_ERR);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 # Check if we're going into an alignment section
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 if ($line =~ /sites sorted by position/) # meme vers > 3.0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 $self->throw($MEME_NO_HEADER_ERR) unless ($self->{'seen_header'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 $in_align_sec = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 elsif ($line =~ /^\s*(\S+)\s+([+-])\s+(\d+)\s+(\S+)\s+([\.ACTGactg]*) ([ACTGactg]+) ([\.ACTGactg]*)/)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 # Got a sequence line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 my $seq_name = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 my $strand = ($2 eq '+') ? 1 : -1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 my $start_pos = $3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 # my $p_val = $4;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 # my $left_flank = uc($5);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 my $central = uc($6);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 # my $right_flank = uc($7);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 # Info about the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 my $seq_res = $central;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 my $seq_len = length($seq_res);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 # Info about the flanking sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 # my $left_len = length($left_flank);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 # my $right_len = length($right_flank);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 # my $start_len = ($strand > 0) ? $left_len : $right_len;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 # my $end_len = ($strand > 0) ? $right_len : $left_len;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 # Make the sequence. Meme gives the start coordinate at the left
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 # hand side of the motif relative to the INPUT sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 my $start_coord = $start_pos;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 my $end_coord = $start_coord + $seq_len - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 my $seq = new Bio::LocatableSeq('-seq'=>$seq_res,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 '-id'=>$seq_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 '-start'=>$start_coord,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 '-end'=>$end_coord,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 '-strand'=>$strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 # Make a seq_feature out of the motif
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 $aln->add_seq($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 elsif (($line =~ /^\-/) || ($line =~ /Sequence name/))
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 # These are acceptable things to be in the site section
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 elsif ($line =~ /^\s*$/)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 # This ends the site section
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 $in_align_sec = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 $good_align_sec = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 else
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 $self->warn("Unrecognized format:\n$line");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 # Signal an error if we didn't find a header section
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 $self->throw($MEME_NO_HEADER_ERR) unless ($self->{'seen_header'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 return (($good_align_sec) ? $aln : 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 =head2 write_aln
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 Title : write_aln
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 Usage : $stream->write_aln(@aln)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 Function: Not implemented
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 Returns : 1 for success and 0 for error
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 Args : Bio::SimpleAlign object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 sub write_aln {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 my ($self,@aln) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 # Don't handle it yet.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 $self->throw("AlignIO::meme::write_aln not implemented");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 # ----------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 # - Private methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 # ----------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 sub _initialize {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 my($self,@args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 # Call into our base version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 $self->SUPER::_initialize(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 # Then initialize our data variables
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 $self->{'seen_header'} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 1;