Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/LiveSeq/AARange.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 # $Id: AARange.pm,v 1.10 2001/10/22 08:22:49 heikki Exp $ | |
2 # | |
3 # bioperl module for Bio::LiveSeq::AARange | |
4 # | |
5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net> | |
6 # | |
7 # Copyright Joseph Insana | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 # | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::LiveSeq::AARange - AARange abstract class for LiveSeq | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 #documentation needed | |
20 | |
21 =head1 DESCRIPTION | |
22 | |
23 This is used as possible parent for aminoacid range object classes. | |
24 Or it can be used straight away to define aminoacid ranges. The idea | |
25 is that the ranges defined are attached to a Translation object and | |
26 they refer to its coordinate-system when they are first created (via | |
27 the new() method). When they are created they are anyway linked to | |
28 the underlying DNA LiveSeq by way of the LiveSeq labels. This allows | |
29 to preserve the ranges even if the numbering changes in the | |
30 Translation due to deletions or insertions. | |
31 | |
32 The protein sequence associated with the AARange can be accessed via | |
33 the usual seq() or subseq() methods. | |
34 | |
35 The start and end of the AARange in protein coordinate system can be | |
36 fetched with aa_start() and aa_end() methods. Note: the behaviour of | |
37 these methods would be influenced by the coordinate_start set in the | |
38 corresponding Translation object. This can be desirable but can also | |
39 lead to confusion if the coordinate_start had been changed and the | |
40 original position of the AARange was to be retrieved. | |
41 | |
42 start() and end() methods of the AARange will point to the labels | |
43 identifying the first nucleotide of the first and last triplet coding | |
44 for the start and end of the AminoAcidRange. | |
45 | |
46 The underlying nucleotide sequence of the AARange can be retrieved | |
47 with the labelsubseq() method. This would retrieve the whole DNA | |
48 sequence, including possible introns. This is called "DNA_sequence". | |
49 | |
50 To fetch the nucleotide sequence of the Transcript, without introns, | |
51 the labelsubseq() of the attached Transcript (the Transcript the | |
52 Translation comes from) has to be accessed. This is called | |
53 "cDNA_sequence". | |
54 | |
55 Here are the operations to retrieve these latter two kinds of | |
56 sequences: | |
57 | |
58 $startlabel=$AARange->start; | |
59 $endtripletlabel=$AARange->end; | |
60 $endlabel=$AARange->{'seq'}->label(3,$endtripletlabel,$AARange->strand); | |
61 | |
62 $dnaseq=$AARange->labelsubseq($startlabel,undef,$endlabel)); | |
63 | |
64 $cdnaseq=$AARange->get_Transcript->labelsubseq($startlabel,undef,$endlabel); | |
65 | |
66 To simplify, these operations have been included in two additional | |
67 methods: dna_seq() and cdna_seq(). | |
68 | |
69 These would return the whole sequence, as in the examples above. But | |
70 the above general scheme can be used by specifying different labels, | |
71 to retrieve hypothetical subsequences of interest. | |
72 | |
73 =head1 AUTHOR - Joseph A.L. Insana | |
74 | |
75 Email: Insana@ebi.ac.uk, jinsana@gmx.net | |
76 | |
77 Address: | |
78 | |
79 EMBL Outstation, European Bioinformatics Institute | |
80 Wellcome Trust Genome Campus, Hinxton | |
81 Cambs. CB10 1SD, United Kingdom | |
82 | |
83 =head1 APPENDIX | |
84 | |
85 The rest of the documentation details each of the object | |
86 methods. Internal methods are usually preceded with a _ | |
87 | |
88 =cut | |
89 | |
90 # Let the code begin... | |
91 | |
92 package Bio::LiveSeq::AARange; | |
93 $VERSION=1.8; | |
94 | |
95 # Version history: | |
96 # Wed Apr 19 15:10:29 BST 2000 v 1.0 begun | |
97 # Wed Apr 19 17:26:58 BST 2000 v 1.4 new, aa_start, aa_end, seq, length created | |
98 # Wed Apr 19 19:57:42 BST 2000 v 1.5 subseq position label added | |
99 # Thu Apr 20 16:13:58 BST 2000 v 1.7 added: documentation, dna_seq, cdna_seq | |
100 # Wed Mar 28 16:58:02 BST 2001 v 1.8 carp -> warn,throw (coded methods in SeqI) | |
101 | |
102 use strict; | |
103 use vars qw($VERSION @ISA); | |
104 use Bio::LiveSeq::SeqI 3.2; # uses SeqI, inherits from it | |
105 @ISA=qw(Bio::LiveSeq::SeqI); | |
106 | |
107 =head2 new | |
108 | |
109 Title : new | |
110 Usage : $aarange = Bio::LiveSeq::AARange->new(-translation => $obj_ref, | |
111 -start => $beginaa, | |
112 -end => $endaa, | |
113 -name => "ABCD", | |
114 -description => "DCBA", | |
115 -translength => $length); | |
116 | |
117 Function: generates a new AminoAcidRange LiveSeq object | |
118 Returns : reference to a new object of class AARange | |
119 Errorcode -1 | |
120 Args : two positions in AminoAcid coordinate numbering | |
121 an object reference specifying to which translation the aminoacid | |
122 ranges refer to | |
123 a name and a description (optional) | |
124 an optional "translength" argument: this can be given when | |
125 a lot of AARanges are to be created at the same time for the same | |
126 Translation object, calculating it with $translation->length | |
127 This would increase the speed, avoiding the new() function to | |
128 calculate everytime the same length again and again for every obj. | |
129 | |
130 =cut | |
131 | |
132 sub new { | |
133 my ($thing, %args) = @_; | |
134 my $class = ref($thing) || $thing; | |
135 my ($obj,%range); | |
136 | |
137 $obj = \%range; | |
138 $obj = bless $obj, $class; | |
139 my $self=$obj; | |
140 | |
141 my ($translation,$start,$end,$name,$description,$translength)=($args{-translation},$args{-start},$args{-end},$args{-name},$args{-description},$args{-translength}); | |
142 | |
143 unless (($translation)&&(ref($translation) eq "Bio::LiveSeq::Translation")) { | |
144 $self->warn("No -translation or wrong type given"); | |
145 return (-1); | |
146 } | |
147 unless ($translength) { # if it's not given, fetch it | |
148 $translength=$translation->length; | |
149 } | |
150 my $seq=$translation->{'seq'}; | |
151 | |
152 if (($start < 1)&&($start > $translength)) { | |
153 $self->warn("$class not initialised because start aminoacid position not valid"); | |
154 return (-1); | |
155 } | |
156 if (($end < 1)&&($end > $translength)) { | |
157 $self->warn("$class not initialised because end aminoacid position not valid"); | |
158 return (-1); | |
159 } | |
160 if ($start > $end) { | |
161 $self->warn("$class not initialised because start position > end position!"); | |
162 return (-1); | |
163 } | |
164 | |
165 my ($starttripletlabel,$endtripletlabel); | |
166 if ($start == $end) { # trick to increase speed | |
167 $starttripletlabel=$endtripletlabel=$translation->label($start); | |
168 } else { | |
169 ($starttripletlabel,$endtripletlabel)=($translation->label($start),$translation->label($end)); | |
170 } | |
171 unless (($starttripletlabel > 0)&&($endtripletlabel > 0)) { | |
172 $self->warn("$class not initialised because of problems in retrieving start or end label!"); | |
173 return (-1); | |
174 } | |
175 | |
176 # unsure if needed: | |
177 #my $endlabel=$seq->label(3,$endtripletlabel); # to get the real end | |
178 #unless ($endlabel > 0) { | |
179 #carp "$class not initialised because of problems retrieving the last nucleotide of the triplet coding for the end aminoacid"; | |
180 #return (-1); | |
181 #} | |
182 $self->{'seq'}=$seq; | |
183 $self->{'start'}=$starttripletlabel; | |
184 $self->{'end'}=$endtripletlabel; | |
185 $self->{'strand'}=$translation->strand; | |
186 $self->{'translation'}=$translation; | |
187 $self->{'name'}=$name; | |
188 $self->{'description'}=$description; | |
189 $self->{'alphabet'}="protein"; | |
190 | |
191 return $obj; | |
192 } | |
193 | |
194 sub coordinate_start { | |
195 my $self=shift; | |
196 $self->warn("Cannot perform this operation in an AminoAcidRange object!"); | |
197 return (-1); | |
198 } | |
199 | |
200 sub all_labels { | |
201 my $self=shift; | |
202 $self->warn("Cannot perform this operation in an AminoAcidRange object!"); | |
203 return (-1); | |
204 } | |
205 | |
206 sub valid { | |
207 my $self=shift; | |
208 $self->warn("Cannot perform this operation in an AminoAcidRange object!"); | |
209 return (-1); | |
210 } | |
211 | |
212 =head2 get_Transcript | |
213 | |
214 Title : valid | |
215 Usage : $transcript = $obj->get_Transcript() | |
216 Function: retrieves the reference to the object of class Transcript (if any) | |
217 attached to a LiveSeq object | |
218 Returns : object reference | |
219 Args : none | |
220 | |
221 =cut | |
222 | |
223 sub get_Transcript { | |
224 my $self=shift; | |
225 return ($self->get_Translation->get_Transcript); | |
226 } | |
227 | |
228 =head2 get_Translation | |
229 | |
230 Title : valid | |
231 Usage : $translation = $obj->get_Translation() | |
232 Function: retrieves the reference to the object of class Translation (if any) | |
233 attached to a LiveSeq object | |
234 Returns : object reference | |
235 Args : none | |
236 | |
237 =cut | |
238 | |
239 sub get_Translation { | |
240 my $self=shift; | |
241 return ($self->{'translation'}); | |
242 } | |
243 | |
244 sub change { | |
245 my $self=shift; | |
246 $self->warn("Cannot change an AminoAcidRange object!"); | |
247 return (-1); | |
248 } | |
249 sub positionchange { | |
250 my $self=shift; | |
251 $self->warn("Cannot change an AminoAcidRange object!"); | |
252 return (-1); | |
253 } | |
254 sub labelchange { | |
255 my $self=shift; | |
256 $self->warn("Cannot change an AminoAcidRange object!"); | |
257 return (-1); | |
258 } | |
259 | |
260 sub subseq { | |
261 my ($self,$pos1,$pos2,$length) = @_; | |
262 if (defined ($length)) { | |
263 if ($length < 1) { | |
264 $self->warn("No sense asking for a subseq of length < 1"); | |
265 return (-1); | |
266 } | |
267 } | |
268 unless (defined ($pos1)) { | |
269 $pos1=1; | |
270 } elsif ($pos1 < 1) { # if position out of boundaries | |
271 $self->warn("Starting position for AARange cannot be < 1!"); return (-1); | |
272 if ((defined ($pos2))&&($pos1>$pos2)) { | |
273 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1); | |
274 } | |
275 } | |
276 my $seq=$self->seq; | |
277 my $objlength=length($seq); | |
278 unless (defined ($length)) { | |
279 $length=$objlength-$pos1+1; | |
280 } | |
281 if (defined ($pos2)) { | |
282 if ($pos2 > $objlength) { # if position out of boundaries | |
283 $self->warn("Ending position for AARange cannot be > length of AARange!"); return (-1); | |
284 } | |
285 $length=$pos2-$pos1+1; | |
286 if ((defined ($pos1))&&($pos1>$pos2)) { | |
287 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1); | |
288 } | |
289 } | |
290 my $str=substr($seq,$pos1-1,$length); | |
291 if (length($str) < $length) { | |
292 $self->warn("Attention, cannot return the length requested for subseq",1); | |
293 } | |
294 return $str; | |
295 } | |
296 | |
297 sub seq { | |
298 my $self=shift; | |
299 my ($aa_start,$aa_end)=($self->aa_start,$self->aa_end); | |
300 unless (($aa_start)&&($aa_end)) { # they must both exist | |
301 $self->warn("Not able to find start or end of the AminoAcid Range"); | |
302 return (0); | |
303 } | |
304 my $translseq=$self->get_Translation->seq; | |
305 return substr($translseq,$aa_start-1,$aa_end-$aa_start+1); | |
306 # Note: it will return "undef" if the translation stops before the start | |
307 # of the aarange (because of upstream nonsense mutation creating STOP). | |
308 # For the same reason it would return uncomplete (up to the STOP) string | |
309 # if the stop happens in between aarange's start and stop | |
310 } | |
311 | |
312 sub length { | |
313 my $self=shift; | |
314 my $seq=$self->seq; | |
315 my $length=length($seq); | |
316 return $length; | |
317 } | |
318 | |
319 sub label { | |
320 my ($self,$position)=@_; | |
321 my $translation=$self->get_Translation; | |
322 my $origstart=$translation->coordinate_start; # preserve it | |
323 $translation->coordinate_start($self->start); # change it | |
324 my $label=$translation->label($position); | |
325 $translation->coordinate_start($origstart); # restore it | |
326 return ($label); | |
327 } | |
328 | |
329 sub position { | |
330 my ($self,$label)=@_; | |
331 my $translation=$self->get_Translation; | |
332 my $origstart=$translation->coordinate_start; # preserve it | |
333 $translation->coordinate_start($self->start); # change it | |
334 my $position=$translation->position($label); | |
335 $translation->coordinate_start($origstart); # restore it | |
336 return ($position); | |
337 } | |
338 | |
339 =head2 aa_start | |
340 | |
341 Title : aa_start | |
342 Usage : $end = $aarange->aa_start() | |
343 Returns : integer (position, according to Translation coordinate system) of | |
344 the start of an AminoAcidRange object | |
345 Args : none | |
346 | |
347 =cut | |
348 | |
349 sub aa_start { | |
350 my $self=shift; | |
351 my $aastart=$self->get_Translation->position($self->{'start'}); | |
352 } | |
353 | |
354 =head2 aa_end | |
355 | |
356 Title : aa_end | |
357 Usage : $end = $aarange->aa_end() | |
358 Returns : integer (position, according to Translation coordinate system) of | |
359 the end of an AminoAcidRange object | |
360 Args : none | |
361 | |
362 =cut | |
363 | |
364 sub aa_end { | |
365 my $self=shift; | |
366 my $aastart=$self->get_Translation->position($self->{'end'}); | |
367 } | |
368 | |
369 =head2 dna_seq | |
370 | |
371 Title : dna_seq | |
372 Usage : $end = $aarange->dna_seq() | |
373 Returns : the sequence at DNA level of the entire AminoAcidRange | |
374 this would include introns (if present) | |
375 Args : none | |
376 | |
377 =cut | |
378 | |
379 sub dna_seq { | |
380 my $self=shift; | |
381 my $startlabel=$self->start; | |
382 my $endtripletlabel=$self->end; | |
383 my $endlabel=$self->{'seq'}->label(3,$endtripletlabel,$self->strand); | |
384 return ($self->labelsubseq($startlabel,undef,$endlabel)); | |
385 } | |
386 | |
387 =head2 cdna_seq | |
388 | |
389 Title : cdna_seq | |
390 Usage : $end = $aarange->cdna_seq() | |
391 Returns : the sequence at cDNA level of the entire AminoAcidRange | |
392 i.e. this is the part of the Transcript that codes for the | |
393 AminoAcidRange. It would be composed just of exonic DNA. | |
394 Args : none | |
395 | |
396 =cut | |
397 | |
398 sub cdna_seq { | |
399 my $self=shift; | |
400 my $startlabel=$self->start; | |
401 my $endtripletlabel=$self->end; | |
402 my $endlabel=$self->{'seq'}->label(3,$endtripletlabel,$self->strand); | |
403 return ($self->get_Transcript->labelsubseq($startlabel,undef,$endlabel)); | |
404 } | |
405 | |
406 # this checks if the attached Transcript has a Gene object attached | |
407 sub gene { | |
408 my ($self,$value) = @_; | |
409 if (defined $value) { | |
410 $self->{'gene'} = $value; | |
411 } | |
412 unless (exists $self->{'gene'}) { | |
413 unless (exists $self->get_Transcript->{'gene'}) { | |
414 return (0); | |
415 } else { | |
416 return ($self->get_Transcript->{'gene'}); | |
417 } | |
418 } else { | |
419 return $self->{'gene'}; | |
420 } | |
421 } | |
422 | |
423 1; |