0
|
1 # $Id: AARange.pm,v 1.10 2001/10/22 08:22:49 heikki Exp $
|
|
2 #
|
|
3 # bioperl module for Bio::LiveSeq::AARange
|
|
4 #
|
|
5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
|
|
6 #
|
|
7 # Copyright Joseph Insana
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10 #
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::LiveSeq::AARange - AARange abstract class for LiveSeq
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 #documentation needed
|
|
20
|
|
21 =head1 DESCRIPTION
|
|
22
|
|
23 This is used as possible parent for aminoacid range object classes.
|
|
24 Or it can be used straight away to define aminoacid ranges. The idea
|
|
25 is that the ranges defined are attached to a Translation object and
|
|
26 they refer to its coordinate-system when they are first created (via
|
|
27 the new() method). When they are created they are anyway linked to
|
|
28 the underlying DNA LiveSeq by way of the LiveSeq labels. This allows
|
|
29 to preserve the ranges even if the numbering changes in the
|
|
30 Translation due to deletions or insertions.
|
|
31
|
|
32 The protein sequence associated with the AARange can be accessed via
|
|
33 the usual seq() or subseq() methods.
|
|
34
|
|
35 The start and end of the AARange in protein coordinate system can be
|
|
36 fetched with aa_start() and aa_end() methods. Note: the behaviour of
|
|
37 these methods would be influenced by the coordinate_start set in the
|
|
38 corresponding Translation object. This can be desirable but can also
|
|
39 lead to confusion if the coordinate_start had been changed and the
|
|
40 original position of the AARange was to be retrieved.
|
|
41
|
|
42 start() and end() methods of the AARange will point to the labels
|
|
43 identifying the first nucleotide of the first and last triplet coding
|
|
44 for the start and end of the AminoAcidRange.
|
|
45
|
|
46 The underlying nucleotide sequence of the AARange can be retrieved
|
|
47 with the labelsubseq() method. This would retrieve the whole DNA
|
|
48 sequence, including possible introns. This is called "DNA_sequence".
|
|
49
|
|
50 To fetch the nucleotide sequence of the Transcript, without introns,
|
|
51 the labelsubseq() of the attached Transcript (the Transcript the
|
|
52 Translation comes from) has to be accessed. This is called
|
|
53 "cDNA_sequence".
|
|
54
|
|
55 Here are the operations to retrieve these latter two kinds of
|
|
56 sequences:
|
|
57
|
|
58 $startlabel=$AARange->start;
|
|
59 $endtripletlabel=$AARange->end;
|
|
60 $endlabel=$AARange->{'seq'}->label(3,$endtripletlabel,$AARange->strand);
|
|
61
|
|
62 $dnaseq=$AARange->labelsubseq($startlabel,undef,$endlabel));
|
|
63
|
|
64 $cdnaseq=$AARange->get_Transcript->labelsubseq($startlabel,undef,$endlabel);
|
|
65
|
|
66 To simplify, these operations have been included in two additional
|
|
67 methods: dna_seq() and cdna_seq().
|
|
68
|
|
69 These would return the whole sequence, as in the examples above. But
|
|
70 the above general scheme can be used by specifying different labels,
|
|
71 to retrieve hypothetical subsequences of interest.
|
|
72
|
|
73 =head1 AUTHOR - Joseph A.L. Insana
|
|
74
|
|
75 Email: Insana@ebi.ac.uk, jinsana@gmx.net
|
|
76
|
|
77 Address:
|
|
78
|
|
79 EMBL Outstation, European Bioinformatics Institute
|
|
80 Wellcome Trust Genome Campus, Hinxton
|
|
81 Cambs. CB10 1SD, United Kingdom
|
|
82
|
|
83 =head1 APPENDIX
|
|
84
|
|
85 The rest of the documentation details each of the object
|
|
86 methods. Internal methods are usually preceded with a _
|
|
87
|
|
88 =cut
|
|
89
|
|
90 # Let the code begin...
|
|
91
|
|
92 package Bio::LiveSeq::AARange;
|
|
93 $VERSION=1.8;
|
|
94
|
|
95 # Version history:
|
|
96 # Wed Apr 19 15:10:29 BST 2000 v 1.0 begun
|
|
97 # Wed Apr 19 17:26:58 BST 2000 v 1.4 new, aa_start, aa_end, seq, length created
|
|
98 # Wed Apr 19 19:57:42 BST 2000 v 1.5 subseq position label added
|
|
99 # Thu Apr 20 16:13:58 BST 2000 v 1.7 added: documentation, dna_seq, cdna_seq
|
|
100 # Wed Mar 28 16:58:02 BST 2001 v 1.8 carp -> warn,throw (coded methods in SeqI)
|
|
101
|
|
102 use strict;
|
|
103 use vars qw($VERSION @ISA);
|
|
104 use Bio::LiveSeq::SeqI 3.2; # uses SeqI, inherits from it
|
|
105 @ISA=qw(Bio::LiveSeq::SeqI);
|
|
106
|
|
107 =head2 new
|
|
108
|
|
109 Title : new
|
|
110 Usage : $aarange = Bio::LiveSeq::AARange->new(-translation => $obj_ref,
|
|
111 -start => $beginaa,
|
|
112 -end => $endaa,
|
|
113 -name => "ABCD",
|
|
114 -description => "DCBA",
|
|
115 -translength => $length);
|
|
116
|
|
117 Function: generates a new AminoAcidRange LiveSeq object
|
|
118 Returns : reference to a new object of class AARange
|
|
119 Errorcode -1
|
|
120 Args : two positions in AminoAcid coordinate numbering
|
|
121 an object reference specifying to which translation the aminoacid
|
|
122 ranges refer to
|
|
123 a name and a description (optional)
|
|
124 an optional "translength" argument: this can be given when
|
|
125 a lot of AARanges are to be created at the same time for the same
|
|
126 Translation object, calculating it with $translation->length
|
|
127 This would increase the speed, avoiding the new() function to
|
|
128 calculate everytime the same length again and again for every obj.
|
|
129
|
|
130 =cut
|
|
131
|
|
132 sub new {
|
|
133 my ($thing, %args) = @_;
|
|
134 my $class = ref($thing) || $thing;
|
|
135 my ($obj,%range);
|
|
136
|
|
137 $obj = \%range;
|
|
138 $obj = bless $obj, $class;
|
|
139 my $self=$obj;
|
|
140
|
|
141 my ($translation,$start,$end,$name,$description,$translength)=($args{-translation},$args{-start},$args{-end},$args{-name},$args{-description},$args{-translength});
|
|
142
|
|
143 unless (($translation)&&(ref($translation) eq "Bio::LiveSeq::Translation")) {
|
|
144 $self->warn("No -translation or wrong type given");
|
|
145 return (-1);
|
|
146 }
|
|
147 unless ($translength) { # if it's not given, fetch it
|
|
148 $translength=$translation->length;
|
|
149 }
|
|
150 my $seq=$translation->{'seq'};
|
|
151
|
|
152 if (($start < 1)&&($start > $translength)) {
|
|
153 $self->warn("$class not initialised because start aminoacid position not valid");
|
|
154 return (-1);
|
|
155 }
|
|
156 if (($end < 1)&&($end > $translength)) {
|
|
157 $self->warn("$class not initialised because end aminoacid position not valid");
|
|
158 return (-1);
|
|
159 }
|
|
160 if ($start > $end) {
|
|
161 $self->warn("$class not initialised because start position > end position!");
|
|
162 return (-1);
|
|
163 }
|
|
164
|
|
165 my ($starttripletlabel,$endtripletlabel);
|
|
166 if ($start == $end) { # trick to increase speed
|
|
167 $starttripletlabel=$endtripletlabel=$translation->label($start);
|
|
168 } else {
|
|
169 ($starttripletlabel,$endtripletlabel)=($translation->label($start),$translation->label($end));
|
|
170 }
|
|
171 unless (($starttripletlabel > 0)&&($endtripletlabel > 0)) {
|
|
172 $self->warn("$class not initialised because of problems in retrieving start or end label!");
|
|
173 return (-1);
|
|
174 }
|
|
175
|
|
176 # unsure if needed:
|
|
177 #my $endlabel=$seq->label(3,$endtripletlabel); # to get the real end
|
|
178 #unless ($endlabel > 0) {
|
|
179 #carp "$class not initialised because of problems retrieving the last nucleotide of the triplet coding for the end aminoacid";
|
|
180 #return (-1);
|
|
181 #}
|
|
182 $self->{'seq'}=$seq;
|
|
183 $self->{'start'}=$starttripletlabel;
|
|
184 $self->{'end'}=$endtripletlabel;
|
|
185 $self->{'strand'}=$translation->strand;
|
|
186 $self->{'translation'}=$translation;
|
|
187 $self->{'name'}=$name;
|
|
188 $self->{'description'}=$description;
|
|
189 $self->{'alphabet'}="protein";
|
|
190
|
|
191 return $obj;
|
|
192 }
|
|
193
|
|
194 sub coordinate_start {
|
|
195 my $self=shift;
|
|
196 $self->warn("Cannot perform this operation in an AminoAcidRange object!");
|
|
197 return (-1);
|
|
198 }
|
|
199
|
|
200 sub all_labels {
|
|
201 my $self=shift;
|
|
202 $self->warn("Cannot perform this operation in an AminoAcidRange object!");
|
|
203 return (-1);
|
|
204 }
|
|
205
|
|
206 sub valid {
|
|
207 my $self=shift;
|
|
208 $self->warn("Cannot perform this operation in an AminoAcidRange object!");
|
|
209 return (-1);
|
|
210 }
|
|
211
|
|
212 =head2 get_Transcript
|
|
213
|
|
214 Title : valid
|
|
215 Usage : $transcript = $obj->get_Transcript()
|
|
216 Function: retrieves the reference to the object of class Transcript (if any)
|
|
217 attached to a LiveSeq object
|
|
218 Returns : object reference
|
|
219 Args : none
|
|
220
|
|
221 =cut
|
|
222
|
|
223 sub get_Transcript {
|
|
224 my $self=shift;
|
|
225 return ($self->get_Translation->get_Transcript);
|
|
226 }
|
|
227
|
|
228 =head2 get_Translation
|
|
229
|
|
230 Title : valid
|
|
231 Usage : $translation = $obj->get_Translation()
|
|
232 Function: retrieves the reference to the object of class Translation (if any)
|
|
233 attached to a LiveSeq object
|
|
234 Returns : object reference
|
|
235 Args : none
|
|
236
|
|
237 =cut
|
|
238
|
|
239 sub get_Translation {
|
|
240 my $self=shift;
|
|
241 return ($self->{'translation'});
|
|
242 }
|
|
243
|
|
244 sub change {
|
|
245 my $self=shift;
|
|
246 $self->warn("Cannot change an AminoAcidRange object!");
|
|
247 return (-1);
|
|
248 }
|
|
249 sub positionchange {
|
|
250 my $self=shift;
|
|
251 $self->warn("Cannot change an AminoAcidRange object!");
|
|
252 return (-1);
|
|
253 }
|
|
254 sub labelchange {
|
|
255 my $self=shift;
|
|
256 $self->warn("Cannot change an AminoAcidRange object!");
|
|
257 return (-1);
|
|
258 }
|
|
259
|
|
260 sub subseq {
|
|
261 my ($self,$pos1,$pos2,$length) = @_;
|
|
262 if (defined ($length)) {
|
|
263 if ($length < 1) {
|
|
264 $self->warn("No sense asking for a subseq of length < 1");
|
|
265 return (-1);
|
|
266 }
|
|
267 }
|
|
268 unless (defined ($pos1)) {
|
|
269 $pos1=1;
|
|
270 } elsif ($pos1 < 1) { # if position out of boundaries
|
|
271 $self->warn("Starting position for AARange cannot be < 1!"); return (-1);
|
|
272 if ((defined ($pos2))&&($pos1>$pos2)) {
|
|
273 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
|
|
274 }
|
|
275 }
|
|
276 my $seq=$self->seq;
|
|
277 my $objlength=length($seq);
|
|
278 unless (defined ($length)) {
|
|
279 $length=$objlength-$pos1+1;
|
|
280 }
|
|
281 if (defined ($pos2)) {
|
|
282 if ($pos2 > $objlength) { # if position out of boundaries
|
|
283 $self->warn("Ending position for AARange cannot be > length of AARange!"); return (-1);
|
|
284 }
|
|
285 $length=$pos2-$pos1+1;
|
|
286 if ((defined ($pos1))&&($pos1>$pos2)) {
|
|
287 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
|
|
288 }
|
|
289 }
|
|
290 my $str=substr($seq,$pos1-1,$length);
|
|
291 if (length($str) < $length) {
|
|
292 $self->warn("Attention, cannot return the length requested for subseq",1);
|
|
293 }
|
|
294 return $str;
|
|
295 }
|
|
296
|
|
297 sub seq {
|
|
298 my $self=shift;
|
|
299 my ($aa_start,$aa_end)=($self->aa_start,$self->aa_end);
|
|
300 unless (($aa_start)&&($aa_end)) { # they must both exist
|
|
301 $self->warn("Not able to find start or end of the AminoAcid Range");
|
|
302 return (0);
|
|
303 }
|
|
304 my $translseq=$self->get_Translation->seq;
|
|
305 return substr($translseq,$aa_start-1,$aa_end-$aa_start+1);
|
|
306 # Note: it will return "undef" if the translation stops before the start
|
|
307 # of the aarange (because of upstream nonsense mutation creating STOP).
|
|
308 # For the same reason it would return uncomplete (up to the STOP) string
|
|
309 # if the stop happens in between aarange's start and stop
|
|
310 }
|
|
311
|
|
312 sub length {
|
|
313 my $self=shift;
|
|
314 my $seq=$self->seq;
|
|
315 my $length=length($seq);
|
|
316 return $length;
|
|
317 }
|
|
318
|
|
319 sub label {
|
|
320 my ($self,$position)=@_;
|
|
321 my $translation=$self->get_Translation;
|
|
322 my $origstart=$translation->coordinate_start; # preserve it
|
|
323 $translation->coordinate_start($self->start); # change it
|
|
324 my $label=$translation->label($position);
|
|
325 $translation->coordinate_start($origstart); # restore it
|
|
326 return ($label);
|
|
327 }
|
|
328
|
|
329 sub position {
|
|
330 my ($self,$label)=@_;
|
|
331 my $translation=$self->get_Translation;
|
|
332 my $origstart=$translation->coordinate_start; # preserve it
|
|
333 $translation->coordinate_start($self->start); # change it
|
|
334 my $position=$translation->position($label);
|
|
335 $translation->coordinate_start($origstart); # restore it
|
|
336 return ($position);
|
|
337 }
|
|
338
|
|
339 =head2 aa_start
|
|
340
|
|
341 Title : aa_start
|
|
342 Usage : $end = $aarange->aa_start()
|
|
343 Returns : integer (position, according to Translation coordinate system) of
|
|
344 the start of an AminoAcidRange object
|
|
345 Args : none
|
|
346
|
|
347 =cut
|
|
348
|
|
349 sub aa_start {
|
|
350 my $self=shift;
|
|
351 my $aastart=$self->get_Translation->position($self->{'start'});
|
|
352 }
|
|
353
|
|
354 =head2 aa_end
|
|
355
|
|
356 Title : aa_end
|
|
357 Usage : $end = $aarange->aa_end()
|
|
358 Returns : integer (position, according to Translation coordinate system) of
|
|
359 the end of an AminoAcidRange object
|
|
360 Args : none
|
|
361
|
|
362 =cut
|
|
363
|
|
364 sub aa_end {
|
|
365 my $self=shift;
|
|
366 my $aastart=$self->get_Translation->position($self->{'end'});
|
|
367 }
|
|
368
|
|
369 =head2 dna_seq
|
|
370
|
|
371 Title : dna_seq
|
|
372 Usage : $end = $aarange->dna_seq()
|
|
373 Returns : the sequence at DNA level of the entire AminoAcidRange
|
|
374 this would include introns (if present)
|
|
375 Args : none
|
|
376
|
|
377 =cut
|
|
378
|
|
379 sub dna_seq {
|
|
380 my $self=shift;
|
|
381 my $startlabel=$self->start;
|
|
382 my $endtripletlabel=$self->end;
|
|
383 my $endlabel=$self->{'seq'}->label(3,$endtripletlabel,$self->strand);
|
|
384 return ($self->labelsubseq($startlabel,undef,$endlabel));
|
|
385 }
|
|
386
|
|
387 =head2 cdna_seq
|
|
388
|
|
389 Title : cdna_seq
|
|
390 Usage : $end = $aarange->cdna_seq()
|
|
391 Returns : the sequence at cDNA level of the entire AminoAcidRange
|
|
392 i.e. this is the part of the Transcript that codes for the
|
|
393 AminoAcidRange. It would be composed just of exonic DNA.
|
|
394 Args : none
|
|
395
|
|
396 =cut
|
|
397
|
|
398 sub cdna_seq {
|
|
399 my $self=shift;
|
|
400 my $startlabel=$self->start;
|
|
401 my $endtripletlabel=$self->end;
|
|
402 my $endlabel=$self->{'seq'}->label(3,$endtripletlabel,$self->strand);
|
|
403 return ($self->get_Transcript->labelsubseq($startlabel,undef,$endlabel));
|
|
404 }
|
|
405
|
|
406 # this checks if the attached Transcript has a Gene object attached
|
|
407 sub gene {
|
|
408 my ($self,$value) = @_;
|
|
409 if (defined $value) {
|
|
410 $self->{'gene'} = $value;
|
|
411 }
|
|
412 unless (exists $self->{'gene'}) {
|
|
413 unless (exists $self->get_Transcript->{'gene'}) {
|
|
414 return (0);
|
|
415 } else {
|
|
416 return ($self->get_Transcript->{'gene'});
|
|
417 }
|
|
418 } else {
|
|
419 return $self->{'gene'};
|
|
420 }
|
|
421 }
|
|
422
|
|
423 1;
|