Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/LiveSeq/AARange.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: AARange.pm,v 1.10 2001/10/22 08:22:49 heikki Exp $ | |
| 2 # | |
| 3 # bioperl module for Bio::LiveSeq::AARange | |
| 4 # | |
| 5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net> | |
| 6 # | |
| 7 # Copyright Joseph Insana | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 # | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::LiveSeq::AARange - AARange abstract class for LiveSeq | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 #documentation needed | |
| 20 | |
| 21 =head1 DESCRIPTION | |
| 22 | |
| 23 This is used as possible parent for aminoacid range object classes. | |
| 24 Or it can be used straight away to define aminoacid ranges. The idea | |
| 25 is that the ranges defined are attached to a Translation object and | |
| 26 they refer to its coordinate-system when they are first created (via | |
| 27 the new() method). When they are created they are anyway linked to | |
| 28 the underlying DNA LiveSeq by way of the LiveSeq labels. This allows | |
| 29 to preserve the ranges even if the numbering changes in the | |
| 30 Translation due to deletions or insertions. | |
| 31 | |
| 32 The protein sequence associated with the AARange can be accessed via | |
| 33 the usual seq() or subseq() methods. | |
| 34 | |
| 35 The start and end of the AARange in protein coordinate system can be | |
| 36 fetched with aa_start() and aa_end() methods. Note: the behaviour of | |
| 37 these methods would be influenced by the coordinate_start set in the | |
| 38 corresponding Translation object. This can be desirable but can also | |
| 39 lead to confusion if the coordinate_start had been changed and the | |
| 40 original position of the AARange was to be retrieved. | |
| 41 | |
| 42 start() and end() methods of the AARange will point to the labels | |
| 43 identifying the first nucleotide of the first and last triplet coding | |
| 44 for the start and end of the AminoAcidRange. | |
| 45 | |
| 46 The underlying nucleotide sequence of the AARange can be retrieved | |
| 47 with the labelsubseq() method. This would retrieve the whole DNA | |
| 48 sequence, including possible introns. This is called "DNA_sequence". | |
| 49 | |
| 50 To fetch the nucleotide sequence of the Transcript, without introns, | |
| 51 the labelsubseq() of the attached Transcript (the Transcript the | |
| 52 Translation comes from) has to be accessed. This is called | |
| 53 "cDNA_sequence". | |
| 54 | |
| 55 Here are the operations to retrieve these latter two kinds of | |
| 56 sequences: | |
| 57 | |
| 58 $startlabel=$AARange->start; | |
| 59 $endtripletlabel=$AARange->end; | |
| 60 $endlabel=$AARange->{'seq'}->label(3,$endtripletlabel,$AARange->strand); | |
| 61 | |
| 62 $dnaseq=$AARange->labelsubseq($startlabel,undef,$endlabel)); | |
| 63 | |
| 64 $cdnaseq=$AARange->get_Transcript->labelsubseq($startlabel,undef,$endlabel); | |
| 65 | |
| 66 To simplify, these operations have been included in two additional | |
| 67 methods: dna_seq() and cdna_seq(). | |
| 68 | |
| 69 These would return the whole sequence, as in the examples above. But | |
| 70 the above general scheme can be used by specifying different labels, | |
| 71 to retrieve hypothetical subsequences of interest. | |
| 72 | |
| 73 =head1 AUTHOR - Joseph A.L. Insana | |
| 74 | |
| 75 Email: Insana@ebi.ac.uk, jinsana@gmx.net | |
| 76 | |
| 77 Address: | |
| 78 | |
| 79 EMBL Outstation, European Bioinformatics Institute | |
| 80 Wellcome Trust Genome Campus, Hinxton | |
| 81 Cambs. CB10 1SD, United Kingdom | |
| 82 | |
| 83 =head1 APPENDIX | |
| 84 | |
| 85 The rest of the documentation details each of the object | |
| 86 methods. Internal methods are usually preceded with a _ | |
| 87 | |
| 88 =cut | |
| 89 | |
| 90 # Let the code begin... | |
| 91 | |
| 92 package Bio::LiveSeq::AARange; | |
| 93 $VERSION=1.8; | |
| 94 | |
| 95 # Version history: | |
| 96 # Wed Apr 19 15:10:29 BST 2000 v 1.0 begun | |
| 97 # Wed Apr 19 17:26:58 BST 2000 v 1.4 new, aa_start, aa_end, seq, length created | |
| 98 # Wed Apr 19 19:57:42 BST 2000 v 1.5 subseq position label added | |
| 99 # Thu Apr 20 16:13:58 BST 2000 v 1.7 added: documentation, dna_seq, cdna_seq | |
| 100 # Wed Mar 28 16:58:02 BST 2001 v 1.8 carp -> warn,throw (coded methods in SeqI) | |
| 101 | |
| 102 use strict; | |
| 103 use vars qw($VERSION @ISA); | |
| 104 use Bio::LiveSeq::SeqI 3.2; # uses SeqI, inherits from it | |
| 105 @ISA=qw(Bio::LiveSeq::SeqI); | |
| 106 | |
| 107 =head2 new | |
| 108 | |
| 109 Title : new | |
| 110 Usage : $aarange = Bio::LiveSeq::AARange->new(-translation => $obj_ref, | |
| 111 -start => $beginaa, | |
| 112 -end => $endaa, | |
| 113 -name => "ABCD", | |
| 114 -description => "DCBA", | |
| 115 -translength => $length); | |
| 116 | |
| 117 Function: generates a new AminoAcidRange LiveSeq object | |
| 118 Returns : reference to a new object of class AARange | |
| 119 Errorcode -1 | |
| 120 Args : two positions in AminoAcid coordinate numbering | |
| 121 an object reference specifying to which translation the aminoacid | |
| 122 ranges refer to | |
| 123 a name and a description (optional) | |
| 124 an optional "translength" argument: this can be given when | |
| 125 a lot of AARanges are to be created at the same time for the same | |
| 126 Translation object, calculating it with $translation->length | |
| 127 This would increase the speed, avoiding the new() function to | |
| 128 calculate everytime the same length again and again for every obj. | |
| 129 | |
| 130 =cut | |
| 131 | |
| 132 sub new { | |
| 133 my ($thing, %args) = @_; | |
| 134 my $class = ref($thing) || $thing; | |
| 135 my ($obj,%range); | |
| 136 | |
| 137 $obj = \%range; | |
| 138 $obj = bless $obj, $class; | |
| 139 my $self=$obj; | |
| 140 | |
| 141 my ($translation,$start,$end,$name,$description,$translength)=($args{-translation},$args{-start},$args{-end},$args{-name},$args{-description},$args{-translength}); | |
| 142 | |
| 143 unless (($translation)&&(ref($translation) eq "Bio::LiveSeq::Translation")) { | |
| 144 $self->warn("No -translation or wrong type given"); | |
| 145 return (-1); | |
| 146 } | |
| 147 unless ($translength) { # if it's not given, fetch it | |
| 148 $translength=$translation->length; | |
| 149 } | |
| 150 my $seq=$translation->{'seq'}; | |
| 151 | |
| 152 if (($start < 1)&&($start > $translength)) { | |
| 153 $self->warn("$class not initialised because start aminoacid position not valid"); | |
| 154 return (-1); | |
| 155 } | |
| 156 if (($end < 1)&&($end > $translength)) { | |
| 157 $self->warn("$class not initialised because end aminoacid position not valid"); | |
| 158 return (-1); | |
| 159 } | |
| 160 if ($start > $end) { | |
| 161 $self->warn("$class not initialised because start position > end position!"); | |
| 162 return (-1); | |
| 163 } | |
| 164 | |
| 165 my ($starttripletlabel,$endtripletlabel); | |
| 166 if ($start == $end) { # trick to increase speed | |
| 167 $starttripletlabel=$endtripletlabel=$translation->label($start); | |
| 168 } else { | |
| 169 ($starttripletlabel,$endtripletlabel)=($translation->label($start),$translation->label($end)); | |
| 170 } | |
| 171 unless (($starttripletlabel > 0)&&($endtripletlabel > 0)) { | |
| 172 $self->warn("$class not initialised because of problems in retrieving start or end label!"); | |
| 173 return (-1); | |
| 174 } | |
| 175 | |
| 176 # unsure if needed: | |
| 177 #my $endlabel=$seq->label(3,$endtripletlabel); # to get the real end | |
| 178 #unless ($endlabel > 0) { | |
| 179 #carp "$class not initialised because of problems retrieving the last nucleotide of the triplet coding for the end aminoacid"; | |
| 180 #return (-1); | |
| 181 #} | |
| 182 $self->{'seq'}=$seq; | |
| 183 $self->{'start'}=$starttripletlabel; | |
| 184 $self->{'end'}=$endtripletlabel; | |
| 185 $self->{'strand'}=$translation->strand; | |
| 186 $self->{'translation'}=$translation; | |
| 187 $self->{'name'}=$name; | |
| 188 $self->{'description'}=$description; | |
| 189 $self->{'alphabet'}="protein"; | |
| 190 | |
| 191 return $obj; | |
| 192 } | |
| 193 | |
| 194 sub coordinate_start { | |
| 195 my $self=shift; | |
| 196 $self->warn("Cannot perform this operation in an AminoAcidRange object!"); | |
| 197 return (-1); | |
| 198 } | |
| 199 | |
| 200 sub all_labels { | |
| 201 my $self=shift; | |
| 202 $self->warn("Cannot perform this operation in an AminoAcidRange object!"); | |
| 203 return (-1); | |
| 204 } | |
| 205 | |
| 206 sub valid { | |
| 207 my $self=shift; | |
| 208 $self->warn("Cannot perform this operation in an AminoAcidRange object!"); | |
| 209 return (-1); | |
| 210 } | |
| 211 | |
| 212 =head2 get_Transcript | |
| 213 | |
| 214 Title : valid | |
| 215 Usage : $transcript = $obj->get_Transcript() | |
| 216 Function: retrieves the reference to the object of class Transcript (if any) | |
| 217 attached to a LiveSeq object | |
| 218 Returns : object reference | |
| 219 Args : none | |
| 220 | |
| 221 =cut | |
| 222 | |
| 223 sub get_Transcript { | |
| 224 my $self=shift; | |
| 225 return ($self->get_Translation->get_Transcript); | |
| 226 } | |
| 227 | |
| 228 =head2 get_Translation | |
| 229 | |
| 230 Title : valid | |
| 231 Usage : $translation = $obj->get_Translation() | |
| 232 Function: retrieves the reference to the object of class Translation (if any) | |
| 233 attached to a LiveSeq object | |
| 234 Returns : object reference | |
| 235 Args : none | |
| 236 | |
| 237 =cut | |
| 238 | |
| 239 sub get_Translation { | |
| 240 my $self=shift; | |
| 241 return ($self->{'translation'}); | |
| 242 } | |
| 243 | |
| 244 sub change { | |
| 245 my $self=shift; | |
| 246 $self->warn("Cannot change an AminoAcidRange object!"); | |
| 247 return (-1); | |
| 248 } | |
| 249 sub positionchange { | |
| 250 my $self=shift; | |
| 251 $self->warn("Cannot change an AminoAcidRange object!"); | |
| 252 return (-1); | |
| 253 } | |
| 254 sub labelchange { | |
| 255 my $self=shift; | |
| 256 $self->warn("Cannot change an AminoAcidRange object!"); | |
| 257 return (-1); | |
| 258 } | |
| 259 | |
| 260 sub subseq { | |
| 261 my ($self,$pos1,$pos2,$length) = @_; | |
| 262 if (defined ($length)) { | |
| 263 if ($length < 1) { | |
| 264 $self->warn("No sense asking for a subseq of length < 1"); | |
| 265 return (-1); | |
| 266 } | |
| 267 } | |
| 268 unless (defined ($pos1)) { | |
| 269 $pos1=1; | |
| 270 } elsif ($pos1 < 1) { # if position out of boundaries | |
| 271 $self->warn("Starting position for AARange cannot be < 1!"); return (-1); | |
| 272 if ((defined ($pos2))&&($pos1>$pos2)) { | |
| 273 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1); | |
| 274 } | |
| 275 } | |
| 276 my $seq=$self->seq; | |
| 277 my $objlength=length($seq); | |
| 278 unless (defined ($length)) { | |
| 279 $length=$objlength-$pos1+1; | |
| 280 } | |
| 281 if (defined ($pos2)) { | |
| 282 if ($pos2 > $objlength) { # if position out of boundaries | |
| 283 $self->warn("Ending position for AARange cannot be > length of AARange!"); return (-1); | |
| 284 } | |
| 285 $length=$pos2-$pos1+1; | |
| 286 if ((defined ($pos1))&&($pos1>$pos2)) { | |
| 287 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1); | |
| 288 } | |
| 289 } | |
| 290 my $str=substr($seq,$pos1-1,$length); | |
| 291 if (length($str) < $length) { | |
| 292 $self->warn("Attention, cannot return the length requested for subseq",1); | |
| 293 } | |
| 294 return $str; | |
| 295 } | |
| 296 | |
| 297 sub seq { | |
| 298 my $self=shift; | |
| 299 my ($aa_start,$aa_end)=($self->aa_start,$self->aa_end); | |
| 300 unless (($aa_start)&&($aa_end)) { # they must both exist | |
| 301 $self->warn("Not able to find start or end of the AminoAcid Range"); | |
| 302 return (0); | |
| 303 } | |
| 304 my $translseq=$self->get_Translation->seq; | |
| 305 return substr($translseq,$aa_start-1,$aa_end-$aa_start+1); | |
| 306 # Note: it will return "undef" if the translation stops before the start | |
| 307 # of the aarange (because of upstream nonsense mutation creating STOP). | |
| 308 # For the same reason it would return uncomplete (up to the STOP) string | |
| 309 # if the stop happens in between aarange's start and stop | |
| 310 } | |
| 311 | |
| 312 sub length { | |
| 313 my $self=shift; | |
| 314 my $seq=$self->seq; | |
| 315 my $length=length($seq); | |
| 316 return $length; | |
| 317 } | |
| 318 | |
| 319 sub label { | |
| 320 my ($self,$position)=@_; | |
| 321 my $translation=$self->get_Translation; | |
| 322 my $origstart=$translation->coordinate_start; # preserve it | |
| 323 $translation->coordinate_start($self->start); # change it | |
| 324 my $label=$translation->label($position); | |
| 325 $translation->coordinate_start($origstart); # restore it | |
| 326 return ($label); | |
| 327 } | |
| 328 | |
| 329 sub position { | |
| 330 my ($self,$label)=@_; | |
| 331 my $translation=$self->get_Translation; | |
| 332 my $origstart=$translation->coordinate_start; # preserve it | |
| 333 $translation->coordinate_start($self->start); # change it | |
| 334 my $position=$translation->position($label); | |
| 335 $translation->coordinate_start($origstart); # restore it | |
| 336 return ($position); | |
| 337 } | |
| 338 | |
| 339 =head2 aa_start | |
| 340 | |
| 341 Title : aa_start | |
| 342 Usage : $end = $aarange->aa_start() | |
| 343 Returns : integer (position, according to Translation coordinate system) of | |
| 344 the start of an AminoAcidRange object | |
| 345 Args : none | |
| 346 | |
| 347 =cut | |
| 348 | |
| 349 sub aa_start { | |
| 350 my $self=shift; | |
| 351 my $aastart=$self->get_Translation->position($self->{'start'}); | |
| 352 } | |
| 353 | |
| 354 =head2 aa_end | |
| 355 | |
| 356 Title : aa_end | |
| 357 Usage : $end = $aarange->aa_end() | |
| 358 Returns : integer (position, according to Translation coordinate system) of | |
| 359 the end of an AminoAcidRange object | |
| 360 Args : none | |
| 361 | |
| 362 =cut | |
| 363 | |
| 364 sub aa_end { | |
| 365 my $self=shift; | |
| 366 my $aastart=$self->get_Translation->position($self->{'end'}); | |
| 367 } | |
| 368 | |
| 369 =head2 dna_seq | |
| 370 | |
| 371 Title : dna_seq | |
| 372 Usage : $end = $aarange->dna_seq() | |
| 373 Returns : the sequence at DNA level of the entire AminoAcidRange | |
| 374 this would include introns (if present) | |
| 375 Args : none | |
| 376 | |
| 377 =cut | |
| 378 | |
| 379 sub dna_seq { | |
| 380 my $self=shift; | |
| 381 my $startlabel=$self->start; | |
| 382 my $endtripletlabel=$self->end; | |
| 383 my $endlabel=$self->{'seq'}->label(3,$endtripletlabel,$self->strand); | |
| 384 return ($self->labelsubseq($startlabel,undef,$endlabel)); | |
| 385 } | |
| 386 | |
| 387 =head2 cdna_seq | |
| 388 | |
| 389 Title : cdna_seq | |
| 390 Usage : $end = $aarange->cdna_seq() | |
| 391 Returns : the sequence at cDNA level of the entire AminoAcidRange | |
| 392 i.e. this is the part of the Transcript that codes for the | |
| 393 AminoAcidRange. It would be composed just of exonic DNA. | |
| 394 Args : none | |
| 395 | |
| 396 =cut | |
| 397 | |
| 398 sub cdna_seq { | |
| 399 my $self=shift; | |
| 400 my $startlabel=$self->start; | |
| 401 my $endtripletlabel=$self->end; | |
| 402 my $endlabel=$self->{'seq'}->label(3,$endtripletlabel,$self->strand); | |
| 403 return ($self->get_Transcript->labelsubseq($startlabel,undef,$endlabel)); | |
| 404 } | |
| 405 | |
| 406 # this checks if the attached Transcript has a Gene object attached | |
| 407 sub gene { | |
| 408 my ($self,$value) = @_; | |
| 409 if (defined $value) { | |
| 410 $self->{'gene'} = $value; | |
| 411 } | |
| 412 unless (exists $self->{'gene'}) { | |
| 413 unless (exists $self->get_Transcript->{'gene'}) { | |
| 414 return (0); | |
| 415 } else { | |
| 416 return ($self->get_Transcript->{'gene'}); | |
| 417 } | |
| 418 } else { | |
| 419 return $self->{'gene'}; | |
| 420 } | |
| 421 } | |
| 422 | |
| 423 1; |
