comparison variant_effect_predictor/Bio/LiveSeq/AARange.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: AARange.pm,v 1.10 2001/10/22 08:22:49 heikki Exp $
2 #
3 # bioperl module for Bio::LiveSeq::AARange
4 #
5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
6 #
7 # Copyright Joseph Insana
8 #
9 # You may distribute this module under the same terms as perl itself
10 #
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::LiveSeq::AARange - AARange abstract class for LiveSeq
16
17 =head1 SYNOPSIS
18
19 #documentation needed
20
21 =head1 DESCRIPTION
22
23 This is used as possible parent for aminoacid range object classes.
24 Or it can be used straight away to define aminoacid ranges. The idea
25 is that the ranges defined are attached to a Translation object and
26 they refer to its coordinate-system when they are first created (via
27 the new() method). When they are created they are anyway linked to
28 the underlying DNA LiveSeq by way of the LiveSeq labels. This allows
29 to preserve the ranges even if the numbering changes in the
30 Translation due to deletions or insertions.
31
32 The protein sequence associated with the AARange can be accessed via
33 the usual seq() or subseq() methods.
34
35 The start and end of the AARange in protein coordinate system can be
36 fetched with aa_start() and aa_end() methods. Note: the behaviour of
37 these methods would be influenced by the coordinate_start set in the
38 corresponding Translation object. This can be desirable but can also
39 lead to confusion if the coordinate_start had been changed and the
40 original position of the AARange was to be retrieved.
41
42 start() and end() methods of the AARange will point to the labels
43 identifying the first nucleotide of the first and last triplet coding
44 for the start and end of the AminoAcidRange.
45
46 The underlying nucleotide sequence of the AARange can be retrieved
47 with the labelsubseq() method. This would retrieve the whole DNA
48 sequence, including possible introns. This is called "DNA_sequence".
49
50 To fetch the nucleotide sequence of the Transcript, without introns,
51 the labelsubseq() of the attached Transcript (the Transcript the
52 Translation comes from) has to be accessed. This is called
53 "cDNA_sequence".
54
55 Here are the operations to retrieve these latter two kinds of
56 sequences:
57
58 $startlabel=$AARange->start;
59 $endtripletlabel=$AARange->end;
60 $endlabel=$AARange->{'seq'}->label(3,$endtripletlabel,$AARange->strand);
61
62 $dnaseq=$AARange->labelsubseq($startlabel,undef,$endlabel));
63
64 $cdnaseq=$AARange->get_Transcript->labelsubseq($startlabel,undef,$endlabel);
65
66 To simplify, these operations have been included in two additional
67 methods: dna_seq() and cdna_seq().
68
69 These would return the whole sequence, as in the examples above. But
70 the above general scheme can be used by specifying different labels,
71 to retrieve hypothetical subsequences of interest.
72
73 =head1 AUTHOR - Joseph A.L. Insana
74
75 Email: Insana@ebi.ac.uk, jinsana@gmx.net
76
77 Address:
78
79 EMBL Outstation, European Bioinformatics Institute
80 Wellcome Trust Genome Campus, Hinxton
81 Cambs. CB10 1SD, United Kingdom
82
83 =head1 APPENDIX
84
85 The rest of the documentation details each of the object
86 methods. Internal methods are usually preceded with a _
87
88 =cut
89
90 # Let the code begin...
91
92 package Bio::LiveSeq::AARange;
93 $VERSION=1.8;
94
95 # Version history:
96 # Wed Apr 19 15:10:29 BST 2000 v 1.0 begun
97 # Wed Apr 19 17:26:58 BST 2000 v 1.4 new, aa_start, aa_end, seq, length created
98 # Wed Apr 19 19:57:42 BST 2000 v 1.5 subseq position label added
99 # Thu Apr 20 16:13:58 BST 2000 v 1.7 added: documentation, dna_seq, cdna_seq
100 # Wed Mar 28 16:58:02 BST 2001 v 1.8 carp -> warn,throw (coded methods in SeqI)
101
102 use strict;
103 use vars qw($VERSION @ISA);
104 use Bio::LiveSeq::SeqI 3.2; # uses SeqI, inherits from it
105 @ISA=qw(Bio::LiveSeq::SeqI);
106
107 =head2 new
108
109 Title : new
110 Usage : $aarange = Bio::LiveSeq::AARange->new(-translation => $obj_ref,
111 -start => $beginaa,
112 -end => $endaa,
113 -name => "ABCD",
114 -description => "DCBA",
115 -translength => $length);
116
117 Function: generates a new AminoAcidRange LiveSeq object
118 Returns : reference to a new object of class AARange
119 Errorcode -1
120 Args : two positions in AminoAcid coordinate numbering
121 an object reference specifying to which translation the aminoacid
122 ranges refer to
123 a name and a description (optional)
124 an optional "translength" argument: this can be given when
125 a lot of AARanges are to be created at the same time for the same
126 Translation object, calculating it with $translation->length
127 This would increase the speed, avoiding the new() function to
128 calculate everytime the same length again and again for every obj.
129
130 =cut
131
132 sub new {
133 my ($thing, %args) = @_;
134 my $class = ref($thing) || $thing;
135 my ($obj,%range);
136
137 $obj = \%range;
138 $obj = bless $obj, $class;
139 my $self=$obj;
140
141 my ($translation,$start,$end,$name,$description,$translength)=($args{-translation},$args{-start},$args{-end},$args{-name},$args{-description},$args{-translength});
142
143 unless (($translation)&&(ref($translation) eq "Bio::LiveSeq::Translation")) {
144 $self->warn("No -translation or wrong type given");
145 return (-1);
146 }
147 unless ($translength) { # if it's not given, fetch it
148 $translength=$translation->length;
149 }
150 my $seq=$translation->{'seq'};
151
152 if (($start < 1)&&($start > $translength)) {
153 $self->warn("$class not initialised because start aminoacid position not valid");
154 return (-1);
155 }
156 if (($end < 1)&&($end > $translength)) {
157 $self->warn("$class not initialised because end aminoacid position not valid");
158 return (-1);
159 }
160 if ($start > $end) {
161 $self->warn("$class not initialised because start position > end position!");
162 return (-1);
163 }
164
165 my ($starttripletlabel,$endtripletlabel);
166 if ($start == $end) { # trick to increase speed
167 $starttripletlabel=$endtripletlabel=$translation->label($start);
168 } else {
169 ($starttripletlabel,$endtripletlabel)=($translation->label($start),$translation->label($end));
170 }
171 unless (($starttripletlabel > 0)&&($endtripletlabel > 0)) {
172 $self->warn("$class not initialised because of problems in retrieving start or end label!");
173 return (-1);
174 }
175
176 # unsure if needed:
177 #my $endlabel=$seq->label(3,$endtripletlabel); # to get the real end
178 #unless ($endlabel > 0) {
179 #carp "$class not initialised because of problems retrieving the last nucleotide of the triplet coding for the end aminoacid";
180 #return (-1);
181 #}
182 $self->{'seq'}=$seq;
183 $self->{'start'}=$starttripletlabel;
184 $self->{'end'}=$endtripletlabel;
185 $self->{'strand'}=$translation->strand;
186 $self->{'translation'}=$translation;
187 $self->{'name'}=$name;
188 $self->{'description'}=$description;
189 $self->{'alphabet'}="protein";
190
191 return $obj;
192 }
193
194 sub coordinate_start {
195 my $self=shift;
196 $self->warn("Cannot perform this operation in an AminoAcidRange object!");
197 return (-1);
198 }
199
200 sub all_labels {
201 my $self=shift;
202 $self->warn("Cannot perform this operation in an AminoAcidRange object!");
203 return (-1);
204 }
205
206 sub valid {
207 my $self=shift;
208 $self->warn("Cannot perform this operation in an AminoAcidRange object!");
209 return (-1);
210 }
211
212 =head2 get_Transcript
213
214 Title : valid
215 Usage : $transcript = $obj->get_Transcript()
216 Function: retrieves the reference to the object of class Transcript (if any)
217 attached to a LiveSeq object
218 Returns : object reference
219 Args : none
220
221 =cut
222
223 sub get_Transcript {
224 my $self=shift;
225 return ($self->get_Translation->get_Transcript);
226 }
227
228 =head2 get_Translation
229
230 Title : valid
231 Usage : $translation = $obj->get_Translation()
232 Function: retrieves the reference to the object of class Translation (if any)
233 attached to a LiveSeq object
234 Returns : object reference
235 Args : none
236
237 =cut
238
239 sub get_Translation {
240 my $self=shift;
241 return ($self->{'translation'});
242 }
243
244 sub change {
245 my $self=shift;
246 $self->warn("Cannot change an AminoAcidRange object!");
247 return (-1);
248 }
249 sub positionchange {
250 my $self=shift;
251 $self->warn("Cannot change an AminoAcidRange object!");
252 return (-1);
253 }
254 sub labelchange {
255 my $self=shift;
256 $self->warn("Cannot change an AminoAcidRange object!");
257 return (-1);
258 }
259
260 sub subseq {
261 my ($self,$pos1,$pos2,$length) = @_;
262 if (defined ($length)) {
263 if ($length < 1) {
264 $self->warn("No sense asking for a subseq of length < 1");
265 return (-1);
266 }
267 }
268 unless (defined ($pos1)) {
269 $pos1=1;
270 } elsif ($pos1 < 1) { # if position out of boundaries
271 $self->warn("Starting position for AARange cannot be < 1!"); return (-1);
272 if ((defined ($pos2))&&($pos1>$pos2)) {
273 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
274 }
275 }
276 my $seq=$self->seq;
277 my $objlength=length($seq);
278 unless (defined ($length)) {
279 $length=$objlength-$pos1+1;
280 }
281 if (defined ($pos2)) {
282 if ($pos2 > $objlength) { # if position out of boundaries
283 $self->warn("Ending position for AARange cannot be > length of AARange!"); return (-1);
284 }
285 $length=$pos2-$pos1+1;
286 if ((defined ($pos1))&&($pos1>$pos2)) {
287 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
288 }
289 }
290 my $str=substr($seq,$pos1-1,$length);
291 if (length($str) < $length) {
292 $self->warn("Attention, cannot return the length requested for subseq",1);
293 }
294 return $str;
295 }
296
297 sub seq {
298 my $self=shift;
299 my ($aa_start,$aa_end)=($self->aa_start,$self->aa_end);
300 unless (($aa_start)&&($aa_end)) { # they must both exist
301 $self->warn("Not able to find start or end of the AminoAcid Range");
302 return (0);
303 }
304 my $translseq=$self->get_Translation->seq;
305 return substr($translseq,$aa_start-1,$aa_end-$aa_start+1);
306 # Note: it will return "undef" if the translation stops before the start
307 # of the aarange (because of upstream nonsense mutation creating STOP).
308 # For the same reason it would return uncomplete (up to the STOP) string
309 # if the stop happens in between aarange's start and stop
310 }
311
312 sub length {
313 my $self=shift;
314 my $seq=$self->seq;
315 my $length=length($seq);
316 return $length;
317 }
318
319 sub label {
320 my ($self,$position)=@_;
321 my $translation=$self->get_Translation;
322 my $origstart=$translation->coordinate_start; # preserve it
323 $translation->coordinate_start($self->start); # change it
324 my $label=$translation->label($position);
325 $translation->coordinate_start($origstart); # restore it
326 return ($label);
327 }
328
329 sub position {
330 my ($self,$label)=@_;
331 my $translation=$self->get_Translation;
332 my $origstart=$translation->coordinate_start; # preserve it
333 $translation->coordinate_start($self->start); # change it
334 my $position=$translation->position($label);
335 $translation->coordinate_start($origstart); # restore it
336 return ($position);
337 }
338
339 =head2 aa_start
340
341 Title : aa_start
342 Usage : $end = $aarange->aa_start()
343 Returns : integer (position, according to Translation coordinate system) of
344 the start of an AminoAcidRange object
345 Args : none
346
347 =cut
348
349 sub aa_start {
350 my $self=shift;
351 my $aastart=$self->get_Translation->position($self->{'start'});
352 }
353
354 =head2 aa_end
355
356 Title : aa_end
357 Usage : $end = $aarange->aa_end()
358 Returns : integer (position, according to Translation coordinate system) of
359 the end of an AminoAcidRange object
360 Args : none
361
362 =cut
363
364 sub aa_end {
365 my $self=shift;
366 my $aastart=$self->get_Translation->position($self->{'end'});
367 }
368
369 =head2 dna_seq
370
371 Title : dna_seq
372 Usage : $end = $aarange->dna_seq()
373 Returns : the sequence at DNA level of the entire AminoAcidRange
374 this would include introns (if present)
375 Args : none
376
377 =cut
378
379 sub dna_seq {
380 my $self=shift;
381 my $startlabel=$self->start;
382 my $endtripletlabel=$self->end;
383 my $endlabel=$self->{'seq'}->label(3,$endtripletlabel,$self->strand);
384 return ($self->labelsubseq($startlabel,undef,$endlabel));
385 }
386
387 =head2 cdna_seq
388
389 Title : cdna_seq
390 Usage : $end = $aarange->cdna_seq()
391 Returns : the sequence at cDNA level of the entire AminoAcidRange
392 i.e. this is the part of the Transcript that codes for the
393 AminoAcidRange. It would be composed just of exonic DNA.
394 Args : none
395
396 =cut
397
398 sub cdna_seq {
399 my $self=shift;
400 my $startlabel=$self->start;
401 my $endtripletlabel=$self->end;
402 my $endlabel=$self->{'seq'}->label(3,$endtripletlabel,$self->strand);
403 return ($self->get_Transcript->labelsubseq($startlabel,undef,$endlabel));
404 }
405
406 # this checks if the attached Transcript has a Gene object attached
407 sub gene {
408 my ($self,$value) = @_;
409 if (defined $value) {
410 $self->{'gene'} = $value;
411 }
412 unless (exists $self->{'gene'}) {
413 unless (exists $self->get_Transcript->{'gene'}) {
414 return (0);
415 } else {
416 return ($self->get_Transcript->{'gene'});
417 }
418 } else {
419 return $self->{'gene'};
420 }
421 }
422
423 1;