comparison variant_effect_predictor/Bio/AlignIO/emboss.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: emboss.pm,v 1.11 2002/10/22 07:45:10 lapp Exp $
2 #
3 # BioPerl module for Bio::AlignIO::emboss
4 #
5 # Cared for by Jason Stajich <jason@bioperl.org>
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::AlignIO::emboss - Parse EMBOSS alignment output (from applications water and needle)
16
17 =head1 SYNOPSIS
18
19 # do not use the object directly
20 use Bio::AlignIO;
21 # read in an alignment from the EMBOSS program water
22 my $in = new Bio::AlignIO(-format => 'emboss',
23 -file => 'seq.water');
24 while( my $aln = $in->next_aln ) {
25 # do something with the alignment
26 }
27
28 =head1 DESCRIPTION
29
30 This object handles parsing and writing pairwise sequence alignments
31 from the EMBOSS suite.
32
33 =head1 FEEDBACK
34
35 =head2 Mailing Lists
36
37 User feedback is an integral part of the evolution of this and other
38 Bioperl modules. Send your comments and suggestions preferably to
39 the Bioperl mailing list. Your participation is much appreciated.
40
41 bioperl-l@bioperl.org - General discussion
42 http://bioperl.org/MailList.shtml - About the mailing lists
43
44 =head2 Reporting Bugs
45
46 Report bugs to the Bioperl bug tracking system to help us keep track
47 of the bugs and their resolution. Bug reports can be submitted via
48 email or the web:
49
50 bioperl-bugs@bioperl.org
51 http://bugzilla.bioperl.org/
52
53 =head1 AUTHOR - Jason Stajich
54
55 Email jason@bioperl.org
56
57 Describe contact details here
58
59 =head1 CONTRIBUTORS
60
61 Additional contributors names and emails here
62
63 =head1 APPENDIX
64
65 The rest of the documentation details each of the object methods.
66 Internal methods are usually preceded with a _
67
68 =cut
69
70
71 # Let the code begin...
72
73
74 package Bio::AlignIO::emboss;
75 use vars qw(@ISA $EMBOSSTitleLen $EMBOSSLineLen);
76 use strict;
77
78 use Bio::AlignIO;
79 use Bio::LocatableSeq;
80
81 @ISA = qw(Bio::AlignIO );
82
83 BEGIN {
84 $EMBOSSTitleLen = 13;
85 $EMBOSSLineLen = 50;
86 }
87
88 sub _initialize {
89 my($self,@args) = @_;
90 $self->SUPER::_initialize(@args);
91 $self->{'_type'} = undef;
92 }
93
94 =head2 next_aln
95
96 Title : next_aln
97 Usage : $aln = $stream->next_aln()
98 Function: returns the next alignment in the stream.
99 Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
100 or on error
101 Args : NONE
102
103 =cut
104
105 sub next_aln {
106 my ($self) = @_;
107 my $seenbegin = 0;
108 my %data = ( 'seq1' => {
109 'start'=> undef,
110 'end'=> undef,
111 'name' => '',
112 'data' => '' },
113 'seq2' => {
114 'start'=> undef,
115 'end'=> undef,
116 'name' => '',
117 'data' => '' },
118 'align' => '',
119 'type' => $self->{'_type'}, # to restore type from
120 # previous aln if possible
121 );
122 my %names;
123 while( defined($_ = $self->_readline) ) {
124 next if( /^\#?\s+$/ || /^\#+\s*$/ );
125 if( /^\#(\=|\-)+\s*$/) {
126 last if( $seenbegin);
127 } elsif( /(Local|Global):\s*(\S+)\s+vs\s+(\S+)/ ||
128 /^\#\s+Program:\s+(\S+)/ )
129 {
130 my ($name1,$name2) = ($2,$3);
131 if( ! defined $name1 ) { # Handle EMBOSS 2.2.X
132 $data{'type'} = $1;
133 $name1 = $name2 = '';
134 } else {
135 $data{'type'} = $1 eq 'Local' ? 'water' : 'needle';
136 }
137 $data{'seq1'}->{'name'} = $name1;
138 $data{'seq2'}->{'name'} = $name2;
139
140 $self->{'_type'} = $data{'type'};
141
142 } elsif( /Score:\s+(\S+)/ ) {
143 $data{'score'} = $1;
144 } elsif( /^\#\s+(1|2):\s+(\S+)/ && ! $data{"seq$1"}->{'name'} ) {
145 my $nm = $2;
146 $nm = substr($nm,0,$EMBOSSTitleLen); # emboss has a max seq length
147 if( $names{$nm} ) {
148 $nm .= "-". $names{$nm};
149 }
150 $names{$nm}++;
151 $data{"seq$1"}->{'name'} = $nm;
152 } elsif( $data{'seq1'}->{'name'} &&
153 /^$data{'seq1'}->{'name'}/ ) {
154 my $count = 0;
155 $seenbegin = 1;
156 my @current;
157 while( defined ($_) ) {
158 my $align_other = '';
159 my $delayed;
160 if($count == 0 || $count == 2 ) {
161 my @l = split;
162 my ($seq,$align,$start,$end);
163 if( $count == 2 && $data{'seq2'}->{'name'} eq '' ) {
164 # weird boundary condition
165 ($start,$align,$end) = @l;
166 } elsif( @l == 3 ) {
167 $align = '';
168 ($seq,$start,$end) = @l
169 } else {
170 ($seq,$start,$align,$end) = @l;
171 }
172
173 my $seqname = sprintf("seq%d", ($count == 0) ? '1' : '2');
174 $data{$seqname}->{'data'} .= $align;
175 $data{$seqname}->{'start'} ||= $start;
176 $data{$seqname}->{'end'} = $end;
177 $current[$count] = [ $start,$align || ''];
178 } else {
179 s/^\s+//;
180 s/\s+$//;
181 $data{'align'} .= $_;
182 }
183
184 BOTTOM:
185 last if( $count++ == 2);
186 $_ = $self->_readline();
187 }
188
189 if( $data{'type'} eq 'needle' ) {
190 # which ever one is shorter we want to bring it up to
191 # length. Man this stinks.
192 my ($s1,$s2) = ($data{'seq1'}, $data{'seq2'});
193
194 my $d = length($current[0]->[1]) - length($current[2]->[1]);
195 if( $d < 0 ) { # s1 is smaller, need to add some
196 # compare the starting points for this alignment line
197 if( $current[0]->[0] <= 1 && $current[2]->[0] > 1) {
198 $s1->{'data'} = ('-' x abs($d)) . $s1->{'data'};
199 $data{'align'} = (' 'x abs($d)).$data{'align'};
200 } else {
201 $s1->{'data'} .= '-' x abs($d);
202 $data{'align'} .= ' 'x abs($d);
203 }
204 } elsif( $d > 0) { # s2 is smaller, need to add some
205 if( $current[2]->[0] <= 1 && $current[0]->[0] > 1) {
206 $s2->{'data'} = ('-' x abs($d)) . $s2->{'data'};
207 $data{'align'} = (' 'x abs($d)).$data{'align'};
208 } else {
209 $s2->{'data'} .= '-' x abs($d);
210 $data{'align'} .= ' 'x abs($d);
211 }
212 }
213 }
214
215 }
216 }
217 return undef unless $seenbegin;
218 my $aln = Bio::SimpleAlign->new(-verbose => $self->verbose(),
219 -source => "EMBOSS-".$data{'type'});
220
221 foreach my $seqname ( qw(seq1 seq2) ) {
222 return undef unless ( defined $data{$seqname} );
223 $data{$seqname}->{'name'} ||= $seqname;
224 my $seq = new Bio::LocatableSeq('-seq' => $data{$seqname}->{'data'},
225 '-id' => $data{$seqname}->{'name'},
226 '-start'=> $data{$seqname}->{'start'},
227 '-end' => $data{$seqname}->{'end'},
228 );
229 $aln->add_seq($seq);
230 }
231 return $aln;
232 }
233
234 =head2 write_aln
235
236 Title : write_aln
237 Usage : $stream->write_aln(@aln)
238 Function: writes the $aln object into the stream in emboss format
239 Returns : 1 for success and 0 for error
240 Args : L<Bio::Align::AlignI> object
241
242
243 =cut
244
245 sub write_aln {
246 my ($self,@aln) = @_;
247
248 $self->throw("Sorry: writing emboss output is not currently available! \n");
249 }
250
251 1;