comparison variant_effect_predictor/Bio/LocatableSeq.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: LocatableSeq.pm,v 1.22.2.1 2003/03/31 11:49:51 heikki Exp $
2 #
3 # BioPerl module for Bio::LocatableSeq
4 #
5 # Cared for by Ewan Birney <birney@sanger.ac.uk>
6 #
7 # Copyright Ewan Birney
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::LocatableSeq - A Sequence object with start/end points on it
16 that can be projected into a MSA or have coordinates relative to another seq.
17
18 =head1 SYNOPSIS
19
20
21 use Bio::LocatableSeq;
22 my $seq = new Bio::LocatableSeq(-seq => "CAGT-GGT",
23 -id => "seq1",
24 -start => 1,
25 -end => 7);
26
27
28 =head1 DESCRIPTION
29
30
31 # a normal sequence object
32 $locseq->seq();
33 $locseq->id();
34
35 # has start,end points
36 $locseq->start();
37 $locseq->end();
38
39 # inheriets off RangeI, so range operations possible
40
41 =head1 FEEDBACK
42
43
44 =head2 Mailing Lists
45
46
47 User feedback is an integral part of the evolution of this and other
48 Bioperl modules. Send your comments and suggestions preferably to one
49 of the Bioperl mailing lists. Your participation is much appreciated.
50
51 bioperl-l@bioperl.org - General discussion
52 http://bio.perl.org/MailList.html - About the mailing lists
53
54
55 The locatable sequence object was developed mainly because the
56 SimpleAlign object requires this functionality, and in the rewrite
57 of the Sequence object we had to decide what to do with this.
58
59 It is, to be honest, not well integrated with the rest of bioperl, for
60 example, the trunc() function does not return a LocatableSeq object,
61 as some might have thought. There are all sorts of nasty gotcha's
62 about interactions between coordinate systems when these sort of
63 objects are used.
64
65
66 =head2 Reporting Bugs
67
68 Report bugs to the Bioperl bug tracking system to help us keep track
69 the bugs and their resolution. Bug reports can be submitted via email
70 or the web:
71
72 bioperl-bugs@bio.perl.org
73 http://bugzilla.bioperl.org/
74
75
76 =head1 APPENDIX
77
78 The rest of the documentation details each of the object
79 methods. Internal methods are usually preceded with a _
80
81 =cut
82
83 #'
84 # Let the code begin...
85
86 package Bio::LocatableSeq;
87 use vars qw(@ISA);
88 use strict;
89
90 use Bio::PrimarySeq;
91 use Bio::RangeI;
92 use Bio::Location::Simple;
93 use Bio::Location::Fuzzy;
94
95
96 @ISA = qw(Bio::PrimarySeq Bio::RangeI);
97
98 sub new {
99 my ($class, @args) = @_;
100 my $self = $class->SUPER::new(@args);
101
102 my ($start,$end,$strand) =
103 $self->_rearrange( [qw(START END STRAND)],
104 @args);
105
106 defined $start && $self->start($start);
107 defined $end && $self->end($end);
108 defined $strand && $self->strand($strand);
109
110 return $self; # success - we hope!
111 }
112
113 =head2 start
114
115 Title : start
116 Usage : $obj->start($newval)
117 Function:
118 Returns : value of start
119 Args : newvalue (optional)
120
121 =cut
122
123 sub start{
124 my $self = shift;
125 if( @_ ) {
126 my $value = shift;
127 $self->{'start'} = $value;
128 }
129 return $self->{'start'};
130
131 }
132
133 =head2 end
134
135 Title : end
136 Usage : $obj->end($newval)
137 Function:
138 Returns : value of end
139 Args : newvalue (optional)
140
141 =cut
142
143 sub end {
144 my $self = shift;
145 if( @_ ) {
146 my $value = shift;
147 my $string = $self->seq;
148 if ($string and $self->start) {
149 my $s2 = $string;
150 $string =~ s/[.-]+//g;
151 my $len = CORE::length $string;
152 my $new_end = $self->start + $len - 1 ;
153 my $id = $self->id;
154 $self->warn("In sequence $id residue count gives value $len.
155 Overriding value [$value] with value $new_end for Bio::LocatableSeq::end().")
156 and $value = $new_end if $new_end != $value and $self->verbose > 0;
157 }
158
159 $self->{'end'} = $value;
160 }
161 return $self->{'end'};
162
163 }
164
165 =head2 strand
166
167 Title : strand
168 Usage : $obj->strand($newval)
169 Function:
170 Returns : value of strand
171 Args : newvalue (optional)
172
173 =cut
174
175 sub strand{
176 my $self = shift;
177 if( @_ ) {
178 my $value = shift;
179 $self->{'strand'} = $value;
180 }
181 return $self->{'strand'};
182 }
183
184 =head2 get_nse
185
186 Title : get_nse
187 Usage :
188 Function: read-only name of form id/start-end
189 Example :
190 Returns :
191 Args :
192
193 =cut
194
195 sub get_nse{
196 my ($self,$char1,$char2) = @_;
197
198 $char1 ||= "/";
199 $char2 ||= "-";
200
201 $self->throw("Attribute id not set") unless $self->id();
202 $self->throw("Attribute start not set") unless $self->start();
203 $self->throw("Attribute end not set") unless $self->end();
204
205 return $self->id() . $char1 . $self->start . $char2 . $self->end ;
206
207 }
208
209
210 =head2 no_gap
211
212 Title : no_gaps
213 Usage :$self->no_gaps('.')
214 Function:
215
216 Gets number of gaps in the sequence. The count excludes
217 leading or trailing gap characters.
218
219 Valid bioperl sequence characters are [A-Za-z\-\.\*]. Of
220 these, '.' and '-' are counted as gap characters unless an
221 optional argument specifies one of them.
222
223 Returns : number of internal gaps in the sequnce.
224 Args : a gap character (optional)
225
226 =cut
227
228 sub no_gaps {
229 my ($self,$char) = @_;
230 my ($seq, $count) = (undef, 0);
231
232 # default gap characters
233 $char ||= '-.';
234
235 $self->warn("I hope you know what you are doing setting gap to [$char]")
236 unless $char =~ /[-.]/;
237
238 $seq = $self->seq;
239 return 0 unless $seq; # empty sequence does not have gaps
240
241 $seq =~ s/^([$char]+)//;
242 $seq =~ s/([$char]+)$//;
243 $count++ while $seq =~ /[$char]+/g;
244
245 return $count;
246
247 }
248
249
250 =head2 column_from_residue_number
251
252 Title : column_from_residue_number
253 Usage : $col = $seq->column_from_residue_number($resnumber)
254 Function:
255
256 This function gives the position in the alignment
257 (i.e. column number) of the given residue number in the
258 sequence. For example, for the sequence
259
260 Seq1/91-97 AC..DEF.GH
261
262 column_from_residue_number(94) returns 5.
263
264 An exception is thrown if the residue number would lie
265 outside the length of the aligment
266 (e.g. column_from_residue_number( "Seq2", 22 )
267
268 Returns : A column number for the position of the
269 given residue in the given sequence (1 = first column)
270 Args : A residue number in the whole sequence (not just that
271 segment of it in the alignment)
272
273 =cut
274
275 sub column_from_residue_number {
276 my ($self, $resnumber) = @_;
277
278 $self->throw("Residue number has to be a positive integer, not [$resnumber]")
279 unless $resnumber =~ /^\d+$/ and $resnumber > 0;
280
281 if ($resnumber >= $self->start() and $resnumber <= $self->end()) {
282 my @residues = split //, $self->seq;
283 my $count = $self->start();
284 my $i;
285 for ($i=0; $i < @residues; $i++) {
286 if ($residues[$i] ne '.' and $residues[$i] ne '-') {
287 $count == $resnumber and last;
288 $count++;
289 }
290 }
291 # $i now holds the index of the column.
292 # The actual column number is this index + 1
293
294 return $i+1;
295 }
296
297 $self->throw("Could not find residue number $resnumber");
298
299 }
300
301
302 =head2 location_from_column
303
304 Title : location_from_column
305 Usage : $loc = $ali->location_from_column( $seq_number, $column_number)
306 Function:
307
308 This function gives the residue number in the sequence with
309 the given name for a given position in the alignment
310 (i.e. column number) of the given. Gaps complicate this
311 process and force the output to be a L<Bio::Range> where
312 values can be undefined. For example, for the alignment
313
314 Seq1/91-97 AC..DEF.G.
315 Seq2/1-9 .CYHDEFKGK
316
317 location_from_column( Seq1/91-97, 3 ) position 93
318 location_from_column( Seq1/91-97, 2 ) position 92^93
319 location_from_column( Seq1/91-97, 10) position 97^98
320 location_from_column( Seq2/1-9, 1 ) position undef
321
322 An exact position returns a Bio::Location::Simple object
323 where where location_type() returns 'EXACT', if a position
324 is between bases location_type() returns 'IN-BETWEEN'.
325 Column before the first residue returns undef. Note that if
326 the position is after the last residue in the alignment,
327 that there is no guarantee that the original sequence has
328 residues after that position.
329
330 An exception is thrown if the column number is not within
331 the sequence.
332
333 Returns : Bio::Location::Simple or undef
334 Args : A column number
335 Throws : If column is not within the sequence
336
337 See L<Bio::Location::Simple> for more.
338
339 =cut
340
341 sub location_from_column {
342 my ($self, $column) = @_;
343
344 $self->throw("Column number has to be a positive integer, not [$column]")
345 unless $column =~ /^\d+$/ and $column > 0;
346 $self->throw("Column number [column] is larger than".
347 " sequence length [". $self->length. "]")
348 unless $column <= $self->length;
349
350 my ($loc);
351 my $s = $self->subseq(1,$column);
352 $s =~ s/\W//g;
353 my $pos = CORE::length $s;
354
355 my $start = $self->start || 0 ;
356 if ($self->subseq($column, $column) =~ /[a-zA-Z]/ ) {
357 $loc = new Bio::Location::Simple
358 (-start => $pos + $start - 1,
359 -end => $pos + $start - 1,
360 -strand => 1
361 );
362 }
363 elsif ($pos == 0 and $self->start == 1) {
364 } else {
365 $loc = new Bio::Location::Simple
366 (-start => $pos + $start - 1,
367 -end => $pos +1 + $start - 1,
368 -strand => 1,
369 -location_type => 'IN-BETWEEN'
370 );
371 }
372 return $loc;
373 }
374
375 =head2 revcom
376
377 Title : revcom
378 Usage : $rev = $seq->revcom()
379 Function: Produces a new Bio::LocatableSeq object which
380 has the reversed complement of the sequence. For protein
381 sequences this throws an exception of "Sequence is a
382 protein. Cannot revcom"
383
384 Returns : A new Bio::LocatableSeq object
385 Args : none
386
387 =cut
388
389 sub revcom {
390 my ($self) = @_;
391
392 my $new = $self->SUPER::revcom;
393 $new->strand($self->strand * -1);
394 $new->start($self->start) if $self->start;
395 $new->end($self->end) if $self->end;
396 return $new;
397 }
398
399
400 =head2 trunc
401
402 Title : trunc
403 Usage : $subseq = $myseq->trunc(10,100);
404 Function: Provides a truncation of a sequence,
405
406 Example :
407 Returns : a fresh Bio::PrimarySeqI implementing object
408 Args : Two integers denoting first and last columns of the
409 sequence to be included into sub-sequence.
410
411
412 =cut
413
414 sub trunc {
415
416 my ($self, $start, $end) = @_;
417 my $new = $self->SUPER::trunc($start, $end);
418
419 $start = $self->location_from_column($start);
420 $start ? ($start = $start->start) : ($start = 1);
421
422 $end = $self->location_from_column($end);
423 $end = $end->start if $end;
424
425 $new->strand($self->strand);
426 $new->start($start) if $start;
427 $new->end($end) if $end;
428
429 return $new;
430 }
431
432 1;