Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/LiveSeq/SeqI.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm | 
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:1f6dce3d34e0 | 
|---|---|
| 1 # $Id: SeqI.pm,v 1.25 2002/10/22 07:38:34 lapp Exp $ | |
| 2 # | |
| 3 # bioperl module for Bio::LiveSeq::SeqI | |
| 4 # | |
| 5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net> | |
| 6 # | |
| 7 # Copyright Joseph Insana | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 # | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::LiveSeq::SeqI - Abstract sequence interface class for LiveSeq | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 # documentation needed | |
| 20 | |
| 21 =head1 DESCRIPTION | |
| 22 | |
| 23 This class implements BioPerl PrimarySeqI interface for Live Seq objects. | |
| 24 | |
| 25 One of the main difference in LiveSequence compared to traditional | |
| 26 "string" sequences is that coordinate systems are flexible. Typically | |
| 27 gene nucleotide numbering starts from 1 at the first character of the | |
| 28 initiator codon (A in ATG). This means that negative positions are | |
| 29 possible and common! | |
| 30 | |
| 31 Secondly, the sequence manipulation methods do not return a new | |
| 32 sequence object but change the current object. The current status can | |
| 33 be written out to BioPerl sequence objects. | |
| 34 | |
| 35 =head1 FEEDBACK | |
| 36 | |
| 37 =head2 Mailing Lists | |
| 38 | |
| 39 User feedback is an integral part of the evolution of this and other | |
| 40 Bioperl modules. Send your comments and suggestions preferably to one | |
| 41 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 42 | |
| 43 bioperl-l@bioperl.org - General discussion | |
| 44 http://bio.perl.org/MailList.html - About the mailing lists | |
| 45 | |
| 46 =head2 Reporting Bugs | |
| 47 | |
| 48 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 49 the bugs and their resolution. Bug reports can be submitted via email | |
| 50 or the web: | |
| 51 | |
| 52 bioperl-bugs@bio.perl.org | |
| 53 http://bugzilla.bioperl.org/ | |
| 54 | |
| 55 =head1 AUTHOR - Joseph A.L. Insana | |
| 56 | |
| 57 Email: Insana@ebi.ac.uk, jinsana@gmx.net | |
| 58 | |
| 59 Address: | |
| 60 | |
| 61 EMBL Outstation, European Bioinformatics Institute | |
| 62 Wellcome Trust Genome Campus, Hinxton | |
| 63 Cambs. CB10 1SD, United Kingdom | |
| 64 | |
| 65 =head1 APPENDIX | |
| 66 | |
| 67 The rest of the documentation details each of the object | |
| 68 methods. Internal methods are usually preceded with a _ | |
| 69 | |
| 70 Some note on the terminology/notation of method names: | |
| 71 label: a unique pointer to a single nucleotide | |
| 72 position: the position of a nucleotide according to a particular coordinate | |
| 73 system (e.g. counting downstream from a particular label taken as | |
| 74 number 1) | |
| 75 base: the one letter code for a nucleotide (i.e.: "a" "t" "c" "g") | |
| 76 | |
| 77 a base is the "value" that an "element" of a "chain" can assume | |
| 78 (see documentation on the Chain datastructure if interested) | |
| 79 | |
| 80 =cut | |
| 81 | |
| 82 #' | |
| 83 # Let the code begin... | |
| 84 | |
| 85 package Bio::LiveSeq::SeqI; | |
| 86 $VERSION=3.3; | |
| 87 # Version history: | |
| 88 # Thu Mar 16 18:11:18 GMT 2000 v.1.0 Started implementation, interface/inheritance from ChainI.pm | |
| 89 # Thu Mar 16 20:05:51 GMT 2000 v 1.2 implemented up to splice_out | |
| 90 # Fri Mar 17 05:37:37 GMT 2000 v 1.3 implemented lot of new methods and written their documentation / in sync with ChainI 1.6 and Chain 2.4 | |
| 91 # Fri Mar 17 17:17:24 GMT 2000 v 1.7 in sync with ChainI 1.7 | |
| 92 # Fri Mar 17 20:12:27 GMT 2000 v 1.8 NAMING change: index->label everywhere | |
| 93 # Mon Mar 20 19:19:21 GMT 2000 v 2.0 renamed from DNA to SeqI and begun | |
| 94 # working on methods defined with Heikki | |
| 95 # Tue Mar 21 01:37:52 GMT 2000 v 2.1 created strand(), seq() | |
| 96 # Tue Mar 21 02:43:21 GMT 2000 v 2.11 seq() prints correctly also for exons | |
| 97 # Wed Mar 22 19:41:45 GMT 2000 v 2.22 translate, alphabet, length, all_labels | |
| 98 # Thu Mar 23 21:03:42 GMT 2000 v 2.3 follows() label() position() | |
| 99 # Fri Mar 24 18:33:18 GMT 2000 v 2.33 rewritten position(), now works with diverse coordinate_starts | |
| 100 # Sat Mar 25 06:11:55 GMT 2000 v 2.4 started subseq | |
| 101 # Mon Mar 27 19:22:32 BST 2000 v 2.45 subseq should be ok but the thing about reverse strand has to be checked!! | |
| 102 # Tue Mar 28 01:53:31 BST 2000 v 2.46 changed strand behaviour in subseq | |
| 103 # Wed Mar 29 00:05:21 BST 2000 v 2.5 change() begun | |
| 104 # Wed Mar 29 02:06:20 BST 2000 v 2.53 _delete _mutate _praeinsert coded | |
| 105 # Wed Mar 29 02:29:01 BST 2000 v 2.531 _mutate changed to make it more general | |
| 106 # Wed Mar 29 03:38:21 BST 2000 v 2.54 tested and corrected change | |
| 107 # Wed Mar 29 16:23:39 BST 2000 v 2.55 change deals with complex now | |
| 108 # Fri Mar 31 18:26:54 BST 2000 v 2.56 translate_string added | |
| 109 # Sat Apr 1 19:02:28 BST 2000 v 2.57 labelchange() created | |
| 110 # Fri Apr 7 03:31:35 BST 2000 v 2.6 labelsubseq() created | |
| 111 # Sat Apr 8 13:01:09 BST 2000 v 2.61 obj_valid() created | |
| 112 # Wed Apr 12 16:23:21 BST 2000 v 2.7 _deletecheck call added in _delete | |
| 113 # Wed Apr 19 16:21:33 BST 2000 v 2.72 name() source() description() added | |
| 114 # Thu Apr 20 14:42:57 BST 2000 v 2.8 added or rewritten much pod documentation | |
| 115 # Thu Apr 27 16:18:55 BST 2000 v 2.82 translate now accounts for ttable info | |
| 116 # Thu Jun 22 20:02:39 BST 2000 v 2.9 valid() from Transcript now moved here, as the general for all objects inheriting from SeqI | |
| 117 # Thu Jun 22 20:17:32 BST 2000 v 2.91 _unsecure_labelsubseq() added | |
| 118 # Sat Jun 24 00:10:31 BST 2000 v 2.92 unsecure is an option of labelsubseq() now | |
| 119 # Thu Jun 29 16:38:45 BST 2000 v 3.0 labelchange() now calls itself again for the DNAobj if the label for the change is not valid for the object requested but valid for the DNAobj | |
| 120 # Tue Jan 30 14:16:22 EST 2001 v 3.1 delete_Obj added, to flush circular references | |
| 121 # Wed Mar 28 15:16:38 BST 2001 v 3.2 functions warn, verbose, throw, stack_trace, stack_trace_dump added | |
| 122 # Wed Apr 4 13:34:29 BST 2001 v 3.3 moved from carp to warn | |
| 123 | |
| 124 use strict; | |
| 125 use vars qw($VERSION @ISA); | |
| 126 use Bio::LiveSeq::ChainI 1.9; # to inherit from it | |
| 127 use Bio::Tools::CodonTable; # for the translate() function | |
| 128 use Bio::PrimarySeqI; | |
| 129 | |
| 130 @ISA=qw(Bio::Root::Root Bio::LiveSeq::ChainI Bio::PrimarySeqI ); # inherit from ChainI | |
| 131 | |
| 132 =head2 seq | |
| 133 | |
| 134 Title : seq | |
| 135 Usage : $string = $obj->seq() | |
| 136 Function: Returns the complete sequence of an object as a string of letters. | |
| 137 Suggested cases are upper case for proteins and lower case for | |
| 138 DNA sequence (IUPAC standard), | |
| 139 Returns : a string | |
| 140 | |
| 141 | |
| 142 =cut | |
| 143 | |
| 144 sub seq { | |
| 145 my $self = shift; | |
| 146 my ($start,$end) = ($self->start(),$self->end()); | |
| 147 if ($self->strand() == 1) { | |
| 148 return $self->{'seq'}->down_chain2string($start,undef,$end); | |
| 149 } else { # reverse strand | |
| 150 my $str = $self->{'seq'}->up_chain2string($start,undef,$end); | |
| 151 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; | |
| 152 return $str; | |
| 153 } | |
| 154 } | |
| 155 | |
| 156 =head2 all_labels | |
| 157 | |
| 158 Title : all_labels | |
| 159 Usage : @labels = $obj->all_labels() | |
| 160 Function: all the labels of every nucleotide an object is composed of | |
| 161 Returns : an array of labels | |
| 162 Args : none | |
| 163 | |
| 164 =cut | |
| 165 | |
| 166 sub all_labels { | |
| 167 my $self = shift; | |
| 168 my ($start,$end) = ($self->start(),$self->end()); | |
| 169 my $labels; | |
| 170 if ($self->strand() == 1) { | |
| 171 $labels=$self->{'seq'}->down_labels($start,$end); | |
| 172 } else { | |
| 173 $labels=$self->{'seq'}->up_labels($start,$end); | |
| 174 } | |
| 175 return (@{$labels}); | |
| 176 } | |
| 177 | |
| 178 =head2 labelsubseq | |
| 179 | |
| 180 Title : labelsubseq | |
| 181 Usage : $dna->labelsubseq(); | |
| 182 : $dna->labelsubseq($startlabel); | |
| 183 : $dna->labelsubseq($startlabel,$length); | |
| 184 : $dna->labelsubseq($startlabel,undef,$endlabel); | |
| 185 e.g. : $dna->labelsubseq(4,undef,8); | |
| 186 Function: prints the sequence as string. The difference between labelsubseq | |
| 187 and normal subseq is that it uses /labels/ as arguments, instead | |
| 188 than positions. This allows for faster and more efficient lookup, | |
| 189 skipping the (usually) lengthy conversion of positions into labels. | |
| 190 This is expecially useful for manipulating with high power | |
| 191 LiveSeq objects, knowing the labels and exploiting their | |
| 192 usefulness. | |
| 193 Returns : a string | |
| 194 Errorcode -1 | |
| 195 Args : without arguments it returns the entire sequence | |
| 196 with a startlabel it returns the sequence downstream that label | |
| 197 if a length is specified, it returns only that number of bases | |
| 198 if an endlabel is specified, it overrides the length argument | |
| 199 and prints instead up to that label (included) | |
| 200 Defaults: $startlabel defaults to the beginning of the entire sequence | |
| 201 $endlabel defaults to the end of the entire sequence | |
| 202 | |
| 203 =cut | |
| 204 | |
| 205 # NOTE: unsecuremode is to be used /ONLY/ if sure of the start and end labels, expecially that they follow each other in the correct order!!!! | |
| 206 | |
| 207 sub labelsubseq { | |
| 208 my ($self,$start,$length,$end,$unsecuremode) = @_; | |
| 209 if (defined $unsecuremode && $unsecuremode eq "unsecuremoderequested") | |
| 210 { # to skip security checks (faster) | |
| 211 unless ($start) { | |
| 212 $start=$self->start; | |
| 213 } | |
| 214 if ($end) { | |
| 215 if ($end == $start) { | |
| 216 $length=1; | |
| 217 undef $end; | |
| 218 } else { | |
| 219 undef $length; | |
| 220 } | |
| 221 } else { | |
| 222 unless ($length) { | |
| 223 $end=$self->end; | |
| 224 } | |
| 225 } | |
| 226 } else { | |
| 227 if ($start) { | |
| 228 unless ($self->{'seq'}->valid($start)) { | |
| 229 $self->warn("Start label not valid"); return (-1); | |
| 230 } | |
| 231 } | |
| 232 if ($end) { | |
| 233 if ($end == $start) { | |
| 234 $length=1; | |
| 235 undef $end; | |
| 236 } else { | |
| 237 unless ($self->{'seq'}->valid($end)) { | |
| 238 $self->warn("End label not valid"); return (-1); | |
| 239 } | |
| 240 unless ($self->follows($start,$end) == 1) { | |
| 241 $self->warn("End label does not follow Start label!"); return (-1); | |
| 242 } | |
| 243 undef $length; | |
| 244 } | |
| 245 } | |
| 246 } | |
| 247 if ($self->strand() == 1) { | |
| 248 return $self->{'seq'}->down_chain2string($start,$length,$end); | |
| 249 } else { # reverse strand | |
| 250 my $str = $self->{'seq'}->up_chain2string($start,$length,$end); | |
| 251 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; | |
| 252 return $str; | |
| 253 } | |
| 254 } | |
| 255 | |
| 256 =head2 subseq | |
| 257 | |
| 258 Title : subseq | |
| 259 Usage : $substring = $obj->subseq(10,40); | |
| 260 : $substring = $obj->subseq(10,undef,4); | |
| 261 Function: returns the subseq from start to end, where the first base | |
| 262 is 1 and the number is inclusive, ie 1-2 are the first two | |
| 263 bases of the sequence | |
| 264 | |
| 265 Start cannot be larger than end but can be equal. | |
| 266 | |
| 267 Allows for negative numbers $obj->subseq(-10,-1). By | |
| 268 definition, there is no 0! | |
| 269 -5 -1 1 5 | |
| 270 gctagcgcccaac atggctcgctg | |
| 271 | |
| 272 This allows to retrieve sequences upstream from given position. | |
| 273 | |
| 274 The precedence is from left to right: if END is given LENGTH is | |
| 275 ignored. | |
| 276 | |
| 277 Examples: $obj->subseq(-10,undef,10) returns 10 elements before position 1 | |
| 278 $obj->subseq(4,8) returns elements from the 4th to the 8th, inclusive | |
| 279 | |
| 280 Returns : a string | |
| 281 Errorcode: -1 | |
| 282 Args : start, integer, defaults to start of the sequence | |
| 283 end, integer, '' or undef, defaults to end of the sequence | |
| 284 length, integer, '' or undef | |
| 285 an optional strand (1 or -1) 4th argument | |
| 286 if strand argument is not given, it will default to the object | |
| 287 argment. This argument is useful when a call is issued from a child | |
| 288 of a parent object containing the subseq method | |
| 289 | |
| 290 =cut | |
| 291 | |
| 292 #' | |
| 293 # check the fact about reverse strand! | |
| 294 # is it feasible? Is it correct? Should we do it? How about exons? Does it | |
| 295 # work when you ask subseq of an exon? | |
| 296 # eliminated now (Mon night) | |
| 297 sub subseq { | |
| 298 ##my ($self,$pos1,$pos2,$length,$strand) = @_; | |
| 299 my ($self,$pos1,$pos2,$length,$strand) = @_; | |
| 300 ##unless (defined ($strand)) { # if optional [strand] argument not given | |
| 301 ## $strand=$self->strand; | |
| 302 ##} | |
| 303 $strand=$self->strand; | |
| 304 my ($str,$startlabel,$endlabel); | |
| 305 if (defined ($length)) { | |
| 306 if ($length < 1) { | |
| 307 $self->warn("No sense asking for a subseq of length < 1"); | |
| 308 return (-1); | |
| 309 } | |
| 310 } | |
| 311 unless (defined ($pos1)) { | |
| 312 #print "\n##### DEBUG pos1 not defined\n"; | |
| 313 $startlabel=$self->start; | |
| 314 } else { | |
| 315 if ($pos1 == 0) { # if position = 0 complain | |
| 316 $self->warn("Position cannot be 0!"); return (-1); | |
| 317 } | |
| 318 ##if ($strand == 1) { # CHECK THIS! | |
| 319 if ((defined ($pos2))&&($pos1>$pos2)) { | |
| 320 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1); | |
| 321 } | |
| 322 ##} else { # CHECK THIS! | |
| 323 ## if ((defined ($pos2))&&($pos1<$pos2)) { | |
| 324 ## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!)"; return (-1); | |
| 325 ## } | |
| 326 ##} | |
| 327 $startlabel=$self->label($pos1); | |
| 328 if ($startlabel < 1) { | |
| 329 $self->warn("position $pos1 not valid as start of subseq!"); return (-1); | |
| 330 } | |
| 331 } | |
| 332 unless (defined ($pos2)) { | |
| 333 #print "\n##### pos2 not defined\n"; | |
| 334 unless (defined ($length)) { | |
| 335 $endlabel=$self->end; | |
| 336 } | |
| 337 } else { | |
| 338 if ($pos2 == 0) { # if position = 0 complain | |
| 339 $self->warn("Position cannot be 0!"); return (-1); | |
| 340 } | |
| 341 undef $length; | |
| 342 ##if ($strand == 1) { # CHECK THIS! | |
| 343 if ((defined ($pos1))&&($pos1>$pos2)) { | |
| 344 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1); | |
| 345 } | |
| 346 ##} else { # CHECK THIS! | |
| 347 ## if ((defined ($pos1))&&($pos1<$pos2)) { | |
| 348 ## $self->warn("1st position($pos1) cannot be < 2nd position($pos2) on reverse strand!"); return (-1); | |
| 349 ## } | |
| 350 ##} | |
| 351 $endlabel=$self->label($pos2); | |
| 352 if ($endlabel < 1) { | |
| 353 $self->warn("position $pos2 not valid as end of subseq!"); return (-1); | |
| 354 } | |
| 355 } | |
| 356 #print "\n ####DEBUG: start $startlabel end $endlabel length $length strand $strand\n"; | |
| 357 | |
| 358 if ($strand == 1) { | |
| 359 $str = $self->{'seq'}->down_chain2string($startlabel,$length,$endlabel); | |
| 360 } else { # reverse strand | |
| 361 $str = $self->{'seq'}->up_chain2string($startlabel,$length,$endlabel); | |
| 362 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; | |
| 363 } | |
| 364 return $str; | |
| 365 } | |
| 366 | |
| 367 =head2 length | |
| 368 | |
| 369 Title : length | |
| 370 Usage : $seq->length(); | |
| 371 Function: returns the number of nucleotides (or the number of aminoacids) | |
| 372 in the entire sequence | |
| 373 Returns : an integer | |
| 374 Errorcode -1 | |
| 375 Args : none | |
| 376 | |
| 377 =cut | |
| 378 | |
| 379 sub length { | |
| 380 my $self=shift; | |
| 381 my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand()); | |
| 382 if ($strand == 1) { | |
| 383 return $self->{'seq'}->down_subchain_length($start,$end); | |
| 384 } else { | |
| 385 return $self->{'seq'}->up_subchain_length($start,$end); | |
| 386 } | |
| 387 } | |
| 388 | |
| 389 =head2 display_id | |
| 390 | |
| 391 Title : display_id | |
| 392 Usage : $id_string = $obj->display_id(); | |
| 393 Function: returns the display id, alias the common name of the object | |
| 394 | |
| 395 The semantics of this is that it is the most likely string | |
| 396 to be used as an identifier of the sequence, and likely to | |
| 397 have "human" readability. The id is equivalent to the ID | |
| 398 field of the GenBank/EMBL databanks and the id field of the | |
| 399 Swissprot/sptrembl database. In fasta format, the >(\S+) is | |
| 400 presumed to be the id, though some people overload the id | |
| 401 to embed other information. | |
| 402 | |
| 403 See also: accession_number | |
| 404 Returns : a string | |
| 405 Args : none | |
| 406 | |
| 407 =cut | |
| 408 | |
| 409 sub display_id { | |
| 410 my ($self,$value) = @_; | |
| 411 if(defined $value) { | |
| 412 $self->{'display_id'} = $value; | |
| 413 } | |
| 414 return $self->{'display_id'}; | |
| 415 } | |
| 416 | |
| 417 | |
| 418 =head2 accession_number | |
| 419 | |
| 420 Title : accession_number | |
| 421 Usage : $unique_biological_key = $obj->accession_number; | |
| 422 Function: Returns the unique biological id for a sequence, commonly | |
| 423 called the accession_number. | |
| 424 Notice that primary_id() provides the unique id for the | |
| 425 implemetation, allowing multiple objects to have the same accession | |
| 426 number in a particular implementation. | |
| 427 | |
| 428 For objects with no accession_number this method returns "unknown". | |
| 429 Returns : a string | |
| 430 Args : none | |
| 431 | |
| 432 =cut | |
| 433 | |
| 434 sub accession_number { | |
| 435 my ($self,$value) = @_; | |
| 436 if (defined $value) { | |
| 437 $self->{'accession_number'} = $value; | |
| 438 } | |
| 439 unless (exists $self->{'accession_number'}) { | |
| 440 return "unknown"; | |
| 441 } else { | |
| 442 return $self->{'accession_number'}; | |
| 443 } | |
| 444 } | |
| 445 | |
| 446 =head2 primary_id | |
| 447 | |
| 448 Title : primary_id | |
| 449 Usage : $unique_implementation_key = $obj->primary_id; | |
| 450 Function: Returns the unique id for this object in this | |
| 451 implementation. This allows implementations to manage their own | |
| 452 object ids in a way the implementation can control. Clients can | |
| 453 expect one id to map to one object. | |
| 454 | |
| 455 For sequences with no primary_id, this method returns | |
| 456 a stringified memory location. | |
| 457 | |
| 458 Returns : A string | |
| 459 Args : None | |
| 460 | |
| 461 =cut | |
| 462 | |
| 463 | |
| 464 sub primary_id { | |
| 465 my ($self,$value) = @_; | |
| 466 if(defined $value) { | |
| 467 $self->{'primary_id'} = $value; | |
| 468 } | |
| 469 unless (exists $self->{'primary_id'}) { | |
| 470 return "$self"; | |
| 471 } else { | |
| 472 return $self->{'primary_id'}; | |
| 473 } | |
| 474 } | |
| 475 | |
| 476 =head2 change | |
| 477 | |
| 478 Title : change | |
| 479 Usage : $substring = $obj->change('AA', 10); | |
| 480 Function: changes, modifies, mutates the LiveSequence | |
| 481 Examples: | |
| 482 $obj->change('', 10); delete nucleotide #10 | |
| 483 $obj->change('', 10, 2); delete two nucleotides starting from #10 | |
| 484 $obj->change('G', 10); change nuc #10 to 'G' | |
| 485 $obj->change('GA', 10, 4); replace #10 and 3 following with 'GA' | |
| 486 $obj->change('GA', 10, 2)); is same as $obj->change('GA', 10); | |
| 487 $obj->change('GA', 10, 0 ); insert 'GA' before nucleotide at #10 | |
| 488 $obj->change('GA', 10, 1); GA inserted before #10, #10 deleted | |
| 489 $obj->change('GATC', 10, 2); GATC inserted before #10, #10 deleted | |
| 490 $obj->change('GATC', 10, 6); GATC inserted before #10, #10-#15 deleted | |
| 491 | |
| 492 | |
| 493 Returns : a string of deleted bases (if any) or 1 (everything OK) | |
| 494 Errorcode: -1 | |
| 495 Args : seq, string, or '' ('' = undef = 0 = deletion) | |
| 496 start, integer | |
| 497 length, integer (optional) | |
| 498 | |
| 499 =cut | |
| 500 | |
| 501 sub change { | |
| 502 &positionchange; | |
| 503 } | |
| 504 | |
| 505 =head2 positionchange | |
| 506 | |
| 507 Title : positionchange | |
| 508 Function: Exactly like change. I.e. change() defaults to positionchange() | |
| 509 | |
| 510 =cut | |
| 511 | |
| 512 sub positionchange { | |
| 513 my ($self,$newseq,$position,$length)=@_; | |
| 514 unless ($position) { | |
| 515 $self->warn("Position not given or position 0"); | |
| 516 return (-1); | |
| 517 } | |
| 518 my $label=$self->label($position); | |
| 519 unless ($label > 0) { # label not found or error | |
| 520 $self->warn("No valid label found at that position!"); | |
| 521 return (-1); | |
| 522 } | |
| 523 return ($self->labelchange($newseq,$label,$length)); | |
| 524 } | |
| 525 | |
| 526 =head2 labelchange | |
| 527 | |
| 528 Title : labelchange | |
| 529 Function: Exactly like change but uses a /label/ instead than a position | |
| 530 as second argument. This allows for multiple changes in a LiveSeq | |
| 531 without the burden of recomputing positions. I.e. for a multiple | |
| 532 change in two different points of the LiveSeq, the approach would | |
| 533 be the following: fetch the correct labels out of the two different | |
| 534 positions (method: label($position)) and then use the labelchange() | |
| 535 method to modify the sequence using those labels instead than | |
| 536 relying on the positions (that would have modified after the | |
| 537 first change). | |
| 538 | |
| 539 =cut | |
| 540 | |
| 541 sub labelchange { | |
| 542 my ($self,$newseq,$label,$length)=@_; | |
| 543 unless ($self->valid($label)) { | |
| 544 if ($self->{'seq'}->valid($label)) { | |
| 545 #$self->warn("Label \'$label\' not valid for executing a LiveSeq change for the object asked but it's ok for DNAlevel change, reverting to that"); | |
| 546 shift @_; | |
| 547 return($self->{'seq'}->labelchange(@_)); | |
| 548 } else { | |
| 549 $self->warn("Label \'$label\' not valid for executing a LiveSeq change"); | |
| 550 return (-1); | |
| 551 } | |
| 552 } | |
| 553 unless ($newseq) { # it means this is a simple deletion | |
| 554 if (defined($length)) { | |
| 555 unless ($length >= 0) { | |
| 556 $self->warn("No sense having length < 0 in a deletion"); | |
| 557 return (-1); | |
| 558 } | |
| 559 } else { | |
| 560 $self->warn("Length not defined for deletion!"); | |
| 561 return (-1); | |
| 562 } | |
| 563 return $self->_delete($label,$length); | |
| 564 } | |
| 565 my $newseqlength=CORE::length($newseq); | |
| 566 if (defined($length)) { | |
| 567 unless ($length >= 0) { | |
| 568 $self->warn("No sense having length < 0 in a change()"); | |
| 569 return (-1); | |
| 570 } | |
| 571 } else { | |
| 572 $length=$newseqlength; # defaults to pointmutation(s) | |
| 573 } | |
| 574 if ($length == 0) { # it means this is a simple insertion, length def&==0 | |
| 575 my ($insertbegin,$insertend)=$self->_praeinsert($label,$newseq); | |
| 576 if ($insertbegin == -1) { | |
| 577 return (-1); | |
| 578 } else { | |
| 579 return (1); | |
| 580 } | |
| 581 } | |
| 582 if ($newseqlength == $length) { # it means this is simple pointmutation(s) | |
| 583 return $self->_mutate($label,$newseq,$length); | |
| 584 } | |
| 585 # if we arrived here then change is complex mixture | |
| 586 my $strand=$self->strand(); | |
| 587 my $afterendlabel=$self->label($length+1,$label,$strand); # get the label at $length+1 positions after $label | |
| 588 unless ($afterendlabel > 0) { # label not found or error | |
| 589 $self->warn("No valid afterendlabel found for executing the complex mutation!"); | |
| 590 return (-1); | |
| 591 } | |
| 592 my $deleted=$self->_delete($label,$length); # first delete length nucs | |
| 593 if ($deleted == -1) { # if errors | |
| 594 return (-1); | |
| 595 } else { # then insert the newsequence | |
| 596 my ($insertbegin,$insertend)=$self->_praeinsert($afterendlabel,$newseq); | |
| 597 if ($insertbegin == -1) { | |
| 598 return (-1); | |
| 599 } else { | |
| 600 return (1); | |
| 601 } | |
| 602 } | |
| 603 } | |
| 604 | |
| 605 # internal methods for change() | |
| 606 | |
| 607 # arguments: label for beginning of deletion, new sequence to insert | |
| 608 # returns: labels of beginning and end of the inserted sequence | |
| 609 # errorcode: -1 | |
| 610 sub _praeinsert { | |
| 611 my ($self,$label,$newseq)=@_; | |
| 612 my ($insertbegin,$insertend); | |
| 613 my $strand=$self->strand(); | |
| 614 if ($strand == 1) { | |
| 615 ($insertbegin,$insertend)=($self->{'seq'}->praeinsert_string($newseq,$label)); | |
| 616 } else { # since it's reverse strand and we insert in forward direction.... | |
| 617 $newseq=reverse($newseq); | |
| 618 $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases | |
| 619 ($insertend,$insertbegin)=($self->{'seq'}->postinsert_string($newseq,$label)); | |
| 620 } | |
| 621 if (($insertbegin==0)||($insertend==0)) { | |
| 622 $self->warn("Some error occurred while inserting!"); | |
| 623 return (-1); | |
| 624 } else { | |
| 625 return ($insertbegin,$insertend); | |
| 626 } | |
| 627 } | |
| 628 | |
| 629 # arguments: label for beginning of deletion, length of deletion | |
| 630 # returns: string of deleted bases | |
| 631 # errorcode: -1 | |
| 632 sub _delete { | |
| 633 my ($self,$label,$length)=@_; | |
| 634 my $strand=$self->strand(); | |
| 635 my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label | |
| 636 unless ($endlabel > 0) { # label not found or error | |
| 637 $self->warn("No valid endlabel found for executing the deletion!"); | |
| 638 return (-1); | |
| 639 } | |
| 640 # this is important in Transcript to fix exon structure | |
| 641 $self->_deletecheck($label,$endlabel); | |
| 642 my $deletedseq; | |
| 643 if ($strand == 1) { | |
| 644 $deletedseq=$self->{'seq'}->splice_chain($label,undef,$endlabel); | |
| 645 } else { | |
| 646 $deletedseq=$self->{'seq'}->splice_chain($endlabel,undef,$label); | |
| 647 $deletedseq=reverse($deletedseq); # because we are on reverse strand and we cut anyway | |
| 648 # in forward direction | |
| 649 $deletedseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases | |
| 650 } | |
| 651 return ($deletedseq); | |
| 652 } | |
| 653 | |
| 654 # empty function, overridden in Transcript, not useful here | |
| 655 sub _deletecheck { | |
| 656 } | |
| 657 | |
| 658 # arguments: label for beginning of mutation, newsequence, number of mutations | |
| 659 # returns: 1 all OK | |
| 660 # errorcode: -1 | |
| 661 sub _mutate { | |
| 662 my ($self,$label,$newseq,$length)=@_; # length is equal to length(newseq) | |
| 663 my ($i,$base,$nextlabel); | |
| 664 my @labels; # array of labels | |
| 665 my $strand=$self->strand(); | |
| 666 if ($length == 1) { # special cases first | |
| 667 @labels=($label); | |
| 668 } else { | |
| 669 my $endlabel=$self->label($length,$label,$strand); # get the label at $length positions after $label | |
| 670 unless ($endlabel > 0) { # label not found or error | |
| 671 $self->warn("No valid endlabel found for executing the mutation!"); | |
| 672 return (-1); | |
| 673 } | |
| 674 if ($length == 2) { # another special case | |
| 675 @labels=($label,$endlabel); | |
| 676 } else { # more than 3 bases changed | |
| 677 # this wouldn't work for Transcript | |
| 678 #my $labelsarrayref; | |
| 679 #if ($strand == 1) { | |
| 680 #$labelsarrayref=$self->{'seq'}->down_labels($label,$endlabel); | |
| 681 #} else { | |
| 682 #$labelsarrayref=$self->{'seq'}->up_labels($label,$endlabel); | |
| 683 #} | |
| 684 #@labels=@{$labelsarrayref}; | |
| 685 #if ($length != scalar(@labels)) { # not enough labels returned | |
| 686 #$self->warn("Not enough valid labels found for executing the mutation!"); | |
| 687 #return (-1); | |
| 688 #} | |
| 689 | |
| 690 # this should be more general | |
| 691 @labels=($label); # put the first one | |
| 692 while ($label != $endlabel) { | |
| 693 $nextlabel=$self->label(2,$label,$strand); # retrieve the next label | |
| 694 push (@labels,$nextlabel); | |
| 695 $label=$nextlabel; # move on reference | |
| 696 } | |
| 697 } | |
| 698 } | |
| 699 if ($strand == -1) { # only for reverse strand | |
| 700 $newseq =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; # since it's reverse strand we get the complementary bases | |
| 701 } | |
| 702 my $errorcheck; # if not equal to $length after summing for all changes, error did occurr | |
| 703 $i = 0; | |
| 704 foreach $base (split(//,$newseq)) { | |
| 705 $errorcheck += $self->{'seq'}->set_value_at_label($base,$labels[$i]); | |
| 706 $i++; | |
| 707 } | |
| 708 if ($errorcheck != $length) { | |
| 709 $self->warn("Some error occurred while mutating!"); | |
| 710 return (-1); | |
| 711 } else { | |
| 712 return (1); | |
| 713 } | |
| 714 } | |
| 715 | |
| 716 =head2 valid | |
| 717 | |
| 718 Title : valid | |
| 719 Usage : $boolean = $obj->valid($label) | |
| 720 Function: tests if a label exists inside the object | |
| 721 Returns : boolean | |
| 722 Args : label | |
| 723 | |
| 724 =cut | |
| 725 | |
| 726 # argument: label | |
| 727 # returns: 1 YES 0 NO | |
| 728 sub valid { | |
| 729 my ($self,$label)=@_; | |
| 730 my $checkme; | |
| 731 my @labels=$self->all_labels; | |
| 732 foreach $checkme (@labels) { | |
| 733 if ($label == $checkme) { | |
| 734 return (1); # found | |
| 735 } | |
| 736 } | |
| 737 return (0); # not found | |
| 738 } | |
| 739 | |
| 740 | |
| 741 =head2 start | |
| 742 | |
| 743 Title : start | |
| 744 Usage : $startlabel=$obj->start() | |
| 745 Function: returns the label of the first nucleotide of the object (exon, CDS) | |
| 746 Returns : label | |
| 747 Args : none | |
| 748 | |
| 749 =cut | |
| 750 | |
| 751 sub start { | |
| 752 my ($self) = @_; | |
| 753 return $self->{'start'}; # common for all classes BUT DNA (which redefines it) and Transcript (that takes the information from the Exons) | |
| 754 } | |
| 755 | |
| 756 =head2 end | |
| 757 | |
| 758 Title : end | |
| 759 Usage : $endlabel=$obj->end() | |
| 760 Function: returns the label of the last nucleotide of the object (exon, CDS) | |
| 761 Returns : label | |
| 762 Args : none | |
| 763 | |
| 764 =cut | |
| 765 | |
| 766 sub end { | |
| 767 my ($self) = @_; | |
| 768 return $self->{'end'}; | |
| 769 } | |
| 770 | |
| 771 =head2 strand | |
| 772 | |
| 773 Title : strand | |
| 774 Usage : $strand=$obj->strand() | |
| 775 $obj->strand($strand) | |
| 776 Function: gets or sets strand information, being 1 or -1 (forward or reverse) | |
| 777 Returns : -1 or 1 | |
| 778 Args : none OR -1 or 1 | |
| 779 | |
| 780 =cut | |
| 781 | |
| 782 sub strand { | |
| 783 my ($self,$strand) = @_; | |
| 784 if ($strand) { | |
| 785 if (($strand != 1)&&($strand != -1)) { | |
| 786 $self->warn("strand information not changed because strand identifier not valid"); | |
| 787 } else { | |
| 788 $self->{'strand'} = $strand; | |
| 789 } | |
| 790 } | |
| 791 return $self->{'strand'}; | |
| 792 } | |
| 793 | |
| 794 =head2 alphabet | |
| 795 | |
| 796 Title : alphabet | |
| 797 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ } | |
| 798 Function: Returns the type of sequence being one of | |
| 799 'dna', 'rna' or 'protein'. This is case sensitive. | |
| 800 | |
| 801 Returns : a string either 'dna','rna','protein'. | |
| 802 Args : none | |
| 803 Note : "circular dna" is set as dna | |
| 804 | |
| 805 =cut | |
| 806 | |
| 807 | |
| 808 sub alphabet { | |
| 809 my %valid_type = map {$_, 1} qw( dna rna protein ); | |
| 810 my ($self,$value) = @_; | |
| 811 if (defined $value) { | |
| 812 $value =~ s/circular dna/dna/; | |
| 813 unless ( $valid_type{$value} ) { | |
| 814 $self->warn("Molecular type '$value' is not a valid type"); | |
| 815 } | |
| 816 $self->{'alphabet'} = $value; | |
| 817 } | |
| 818 return $self->{'alphabet'}; | |
| 819 } | |
| 820 | |
| 821 =head2 coordinate_start | |
| 822 | |
| 823 Title : coordinate_start | |
| 824 Usage : $coordstartlabel=$obj->coordinate_start() | |
| 825 : $coordstartlabel=$obj->coordinate_start($label) | |
| 826 Function: returns and optionally sets the first label of the coordinate | |
| 827 system used | |
| 828 For some objects only labels inside the object or in frame (for | |
| 829 Translation objects) will be allowed to get set as coordinate start | |
| 830 | |
| 831 Returns : label. It returns 0 if label not found. | |
| 832 Errorcode -1 | |
| 833 Args : an optional reference $label that is position 1 | |
| 834 | |
| 835 =cut | |
| 836 | |
| 837 | |
| 838 sub coordinate_start { | |
| 839 my ($self,$label) = @_; | |
| 840 if ($label) { | |
| 841 if ($self->valid($label)) { | |
| 842 $self->{'coordinate_start'} = $label; | |
| 843 } else { | |
| 844 $self->warn("The label you are trying to set as coordinate_start is not valid for this object"); | |
| 845 } | |
| 846 } | |
| 847 my $coord_start = $self->{'coordinate_start'}; | |
| 848 if ($coord_start) { | |
| 849 return $coord_start; | |
| 850 } else { | |
| 851 return $self->start(); | |
| 852 } | |
| 853 } | |
| 854 | |
| 855 =head2 label | |
| 856 | |
| 857 Title : label | |
| 858 Usage : $seq->label($position) | |
| 859 : $seq->label($position,$firstlabel) | |
| 860 Examples: $nextlabel=$seq->label(2,$label) -> retrieves the following label | |
| 861 : $prevlabel=$seq->label(-1,$label) -> retrieves the preceding label | |
| 862 | |
| 863 Function: returns the label of the nucleotide at $position from current | |
| 864 coordinate start | |
| 865 Returns : a label. It returns 0 if label not found. | |
| 866 Errorcode -1 | |
| 867 Args : a position, | |
| 868 an optional reference $firstlabel that is to be used as position 1 | |
| 869 an optional strand (1 or -1) argument | |
| 870 if strand argument is not given, it will default to the object | |
| 871 argument. This argument is useful when a call is issued from a child | |
| 872 of a parent object containing the subseq method | |
| 873 | |
| 874 =cut | |
| 875 | |
| 876 | |
| 877 sub label { | |
| 878 my ($self,$position,$firstlabel,$strand)=@_; | |
| 879 my $label; | |
| 880 unless (defined ($firstlabel)) { | |
| 881 $firstlabel=$self->coordinate_start; | |
| 882 } | |
| 883 unless ($position) { # if position = 0 complain ? | |
| 884 $self->warn("Position not given or position 0"); | |
| 885 return (-1); | |
| 886 } | |
| 887 unless (defined ($strand)) { # if optional [strand] argument not given | |
| 888 $strand=$self->strand; | |
| 889 } | |
| 890 if ($strand == 1) { | |
| 891 if ($position > 0) { | |
| 892 $label=$self->{'seq'}->down_get_label_at_pos($position,$firstlabel) | |
| 893 } else { # if < 0 | |
| 894 $label=$self->{'seq'}->up_get_label_at_pos(1 - $position,$firstlabel) | |
| 895 } | |
| 896 } else { | |
| 897 if ($position > 0) { | |
| 898 $label=$self->{'seq'}->up_get_label_at_pos($position,$firstlabel) | |
| 899 } else { # if < 0 | |
| 900 $label=$self->{'seq'}->down_get_label_at_pos(1 - $position,$firstlabel) | |
| 901 } | |
| 902 } | |
| 903 return $label; | |
| 904 } | |
| 905 | |
| 906 | |
| 907 =head2 position | |
| 908 | |
| 909 Title : position | |
| 910 Usage : $seq->position($label) | |
| 911 : $seq->position($label,$firstlabel) | |
| 912 Function: returns the position of nucleotide at $label | |
| 913 Returns : the position of the label from current coordinate start | |
| 914 Errorcode 0 | |
| 915 Args : a label pointing to a certain nucleotide (e.g. start of exon) | |
| 916 an optional "firstlabel" as reference to count from | |
| 917 an optional strand (1 or -1) argument | |
| 918 if strand argument is not given, it will default to the object | |
| 919 argument. This argument is useful when a call is issued from a child | |
| 920 of a parent object containing the subseq method | |
| 921 | |
| 922 =cut | |
| 923 | |
| 924 | |
| 925 sub position { | |
| 926 my ($self,$label,$firstlabel,$strand)=@_; | |
| 927 unless (defined ($strand)) { # if optional [strand] argument not given | |
| 928 $strand=$self->strand; | |
| 929 } | |
| 930 unless (defined ($firstlabel)) { | |
| 931 $firstlabel=$self->coordinate_start; | |
| 932 } | |
| 933 unless ($self->valid($label)) { | |
| 934 $self->warn("label not valid"); | |
| 935 return (0); | |
| 936 } | |
| 937 if ($firstlabel == $label) { | |
| 938 return (1); | |
| 939 } | |
| 940 my ($coordpos,$position0,$position); | |
| 941 $position0=$self->{'seq'}->down_get_pos_of_label($label); | |
| 942 $coordpos=$self->{'seq'}->down_get_pos_of_label($firstlabel); | |
| 943 $position=$position0-$coordpos+1; | |
| 944 if ($position <= 0) { | |
| 945 $position--; | |
| 946 } | |
| 947 if ($strand == -1) { | |
| 948 #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",1-$position; | |
| 949 return (1-$position); | |
| 950 } else { | |
| 951 #print "\n----------DEBUGSEQPOS label $label firstlabel $firstlabel strand $strand: position=",$position; | |
| 952 return ($position); | |
| 953 } | |
| 954 } | |
| 955 | |
| 956 =head2 follows | |
| 957 | |
| 958 Title : follows | |
| 959 Usage : $seq->follows($firstlabel,$secondlabel) | |
| 960 : $seq->follows($firstlabel,$secondlabel,$strand) | |
| 961 Function: checks if SECONDlabel follows FIRSTlabel, undependent of the strand | |
| 962 i.e. it checks downstream for forward strand and | |
| 963 upstream for reverse strand | |
| 964 Returns : 1 or 0 | |
| 965 Errorcode -1 | |
| 966 Args : two labels | |
| 967 an optional strand (1 or -1) argument | |
| 968 if strand argument is not given, it will default to the object | |
| 969 argument. This argument is useful when a call is issued from a child | |
| 970 of a parent object containing the subseq method | |
| 971 | |
| 972 =cut | |
| 973 | |
| 974 #' | |
| 975 # wraparound to is_downstream and is_upstream that chooses the correct one | |
| 976 # depending on the strand | |
| 977 sub follows { | |
| 978 my ($self,$firstlabel,$secondlabel,$strand)=@_; | |
| 979 unless (defined ($strand)) { # if optional [strand] argument not given | |
| 980 $strand=$self->strand; | |
| 981 } | |
| 982 if ($strand == 1) { | |
| 983 return ($self->{'seq'}->is_downstream($firstlabel,$secondlabel)); | |
| 984 } else { | |
| 985 return ($self->{'seq'}->is_upstream($firstlabel,$secondlabel)); | |
| 986 } | |
| 987 } | |
| 988 # | |
| 989 #=head2 translate | |
| 990 # | |
| 991 # Title : translate | |
| 992 # Usage : $protein_seq = $obj->translate | |
| 993 # Function: Provides the translation of the DNA sequence | |
| 994 # using full IUPAC ambiguities in DNA/RNA and amino acid codes. | |
| 995 # | |
| 996 # The resulting translation is identical to EMBL/TREMBL database | |
| 997 # translations. | |
| 998 # | |
| 999 # Returns : a string | |
| 1000 # Args : character for terminator (optional) defaults to '*' | |
| 1001 # character for unknown amino acid (optional) defaults to 'X' | |
| 1002 # frame (optional) valid values 0, 1, 3, defaults to 0 | |
| 1003 # codon table id (optional) defaults to 1 | |
| 1004 # | |
| 1005 #=cut | |
| 1006 # | |
| 1007 #sub translate { | |
| 1008 # my ($self) = shift; | |
| 1009 # return ($self->translate_string($self->seq,@_)); | |
| 1010 #} | |
| 1011 # | |
| 1012 #=head2 translate_string | |
| 1013 # | |
| 1014 # Title : translate_string | |
| 1015 # Usage : $protein_seq = $obj->translate_string("attcgtgttgatcgatta"); | |
| 1016 # Function: Like translate, but can be used to translate subsequences after | |
| 1017 # having retrieved them as string. | |
| 1018 # Args : 1st argument is a string. Optional following arguments: like in | |
| 1019 # the translate method | |
| 1020 # | |
| 1021 #=cut | |
| 1022 # | |
| 1023 # | |
| 1024 #sub translate_string { | |
| 1025 # my($self) = shift; | |
| 1026 # my($seq) = shift; | |
| 1027 # my($stop, $unknown, $frame, $tableid) = @_; | |
| 1028 # my($i, $len, $output) = (0,0,''); | |
| 1029 # my($codon) = ""; | |
| 1030 # my $aa; | |
| 1031 # | |
| 1032 # | |
| 1033 # ## User can pass in symbol for stop and unknown codons | |
| 1034 # unless(defined($stop) and $stop ne '') { $stop = "*"; } | |
| 1035 # unless(defined($unknown) and $unknown ne '') { $unknown = "X"; } | |
| 1036 # unless(defined($frame) and $frame ne '') { $frame = 0; } | |
| 1037 # | |
| 1038 # ## the codon table ID | |
| 1039 # if ($self->translation_table) { | |
| 1040 # $tableid = $self->translation_table; | |
| 1041 # } | |
| 1042 # unless(defined($tableid) and $tableid ne '') { $tableid = 1; } | |
| 1043 # | |
| 1044 # ##Error if monomer is "Amino" | |
| 1045 # $self->warn("Can't translate an amino acid sequence.") | |
| 1046 # if (defined $self->alphabet && $self->alphabet eq 'protein'); | |
| 1047 # | |
| 1048 # ##Error if frame is not 0, 1 or 2 | |
| 1049 # $self->warn("Valid values for frame are 0, 1, 2, not [$frame].") | |
| 1050 # unless ($frame == 0 or $frame == 1 or $frame == 2); | |
| 1051 # | |
| 1052 # #thows a warning if ID is invalid | |
| 1053 # my $codonTable = Bio::Tools::CodonTable->new( -id => $tableid); | |
| 1054 # | |
| 1055 # # deal with frame offset. | |
| 1056 # if( $frame ) { | |
| 1057 # $seq = substr ($seq,$frame); | |
| 1058 # } | |
| 1059 # | |
| 1060 # for $codon ( grep { CORE::length == 3 } split(/(.{3})/, $seq) ) { | |
| 1061 # my $aa = $codonTable->translate($codon); | |
| 1062 # if ($aa eq '*') { | |
| 1063 # $output .= $stop; | |
| 1064 # } | |
| 1065 # elsif ($aa eq 'X') { | |
| 1066 # $output .= $unknown; | |
| 1067 # } | |
| 1068 # else { | |
| 1069 # $output .= $aa ; | |
| 1070 # } | |
| 1071 # } | |
| 1072 # #if( substr($output,-1,1) eq $stop ) { | |
| 1073 # # chop $output; | |
| 1074 # #} | |
| 1075 # | |
| 1076 # return ($output); | |
| 1077 #} | |
| 1078 | |
| 1079 =head2 gene | |
| 1080 | |
| 1081 Title : gene | |
| 1082 Usage : my $gene=$obj->gene; | |
| 1083 Function: Gets or sets the reference to the LiveSeq::Gene object. | |
| 1084 Objects that are features of a LiveSeq Gene will have this | |
| 1085 attribute set automatically. | |
| 1086 | |
| 1087 Returns : reference to an object of class Gene | |
| 1088 Note : if Gene object is not set, this method will return 0; | |
| 1089 Args : none or reference to object of class Bio::LiveSeq::Gene | |
| 1090 | |
| 1091 =cut | |
| 1092 | |
| 1093 sub gene { | |
| 1094 my ($self,$value) = @_; | |
| 1095 if (defined $value) { | |
| 1096 $self->{'gene'} = $value; | |
| 1097 } | |
| 1098 unless (exists $self->{'gene'}) { | |
| 1099 return (0); | |
| 1100 } else { | |
| 1101 return $self->{'gene'}; | |
| 1102 } | |
| 1103 } | |
| 1104 | |
| 1105 =head2 obj_valid | |
| 1106 | |
| 1107 Title : obj_valid | |
| 1108 Usage : if ($obj->obj_valid) {do something;} | |
| 1109 Function: Checks if start and end labels are still valid for the ojbect, | |
| 1110 i.e. tests if the LiveSeq object is still valid | |
| 1111 Returns : boolean | |
| 1112 Args : none | |
| 1113 | |
| 1114 =cut | |
| 1115 | |
| 1116 sub obj_valid { | |
| 1117 my $self=shift; | |
| 1118 unless (($self->{'seq'}->valid($self->start()))&&($self->{'seq'}->valid($self->end()))) { | |
| 1119 return (0); | |
| 1120 } | |
| 1121 return (1); | |
| 1122 } | |
| 1123 | |
| 1124 =head2 name | |
| 1125 | |
| 1126 Title : name | |
| 1127 Usage : $name = $obj->name; | |
| 1128 : $name = $obj->name("ABCD"); | |
| 1129 Function: Returns or sets the name of the object. | |
| 1130 If there is no name, it will return "unknown"; | |
| 1131 Returns : A string | |
| 1132 Args : None | |
| 1133 | |
| 1134 =cut | |
| 1135 | |
| 1136 sub name { | |
| 1137 my ($self,$value) = @_; | |
| 1138 if (defined $value) { | |
| 1139 $self->{'name'} = $value; | |
| 1140 } | |
| 1141 unless (exists $self->{'name'}) { | |
| 1142 return "unknown"; | |
| 1143 } else { | |
| 1144 return $self->{'name'}; | |
| 1145 } | |
| 1146 } | |
| 1147 | |
| 1148 =head2 desc | |
| 1149 | |
| 1150 Title : desc | |
| 1151 Usage : $desc = $obj->desc; | |
| 1152 : $desc = $obj->desc("ABCD"); | |
| 1153 Function: Returns or sets the description of the object. | |
| 1154 If there is no description, it will return "unknown"; | |
| 1155 Returns : A string | |
| 1156 Args : None | |
| 1157 | |
| 1158 =cut | |
| 1159 | |
| 1160 sub desc { | |
| 1161 my ($self,$value) = @_; | |
| 1162 if (defined $value) { | |
| 1163 $self->{'desc'} = $value; | |
| 1164 } | |
| 1165 unless (exists $self->{'desc'}) { | |
| 1166 return "unknown"; | |
| 1167 } else { | |
| 1168 return $self->{'desc'}; | |
| 1169 } | |
| 1170 } | |
| 1171 | |
| 1172 =head2 source | |
| 1173 | |
| 1174 Title : source | |
| 1175 Usage : $name = $obj->source; | |
| 1176 : $name = $obj->source("Homo sapiens"); | |
| 1177 Function: Returns or sets the organism that is source of the object. | |
| 1178 If there is no source, it will return "unknown"; | |
| 1179 Returns : A string | |
| 1180 Args : None | |
| 1181 | |
| 1182 =cut | |
| 1183 | |
| 1184 sub source { | |
| 1185 my ($self,$value) = @_; | |
| 1186 if (defined $value) { | |
| 1187 $self->{'source'} = $value; | |
| 1188 } | |
| 1189 unless (exists $self->{'source'}) { | |
| 1190 return "unknown"; | |
| 1191 } else { | |
| 1192 return $self->{'source'}; | |
| 1193 } | |
| 1194 } | |
| 1195 | |
| 1196 sub delete_Obj { | |
| 1197 my $self = shift; | |
| 1198 my @values= values %{$self}; | |
| 1199 my @keys= keys %{$self}; | |
| 1200 | |
| 1201 foreach my $key ( @keys ) { | |
| 1202 delete $self->{$key}; | |
| 1203 } | |
| 1204 foreach my $value ( @values ) { | |
| 1205 if (index(ref($value),"LiveSeq") != -1) { # object case | |
| 1206 eval { | |
| 1207 # delete $self->{$value}; | |
| 1208 $value->delete_Obj; | |
| 1209 }; | |
| 1210 } elsif (index(ref($value),"ARRAY") != -1) { # array case | |
| 1211 my @array=@{$value}; | |
| 1212 my $element; | |
| 1213 foreach $element (@array) { | |
| 1214 eval { | |
| 1215 $element->delete_Obj; | |
| 1216 }; | |
| 1217 } | |
| 1218 } elsif (index(ref($value),"HASH") != -1) { # object case | |
| 1219 my %hash=%{$value}; | |
| 1220 my $element; | |
| 1221 foreach $element (%hash) { | |
| 1222 eval { | |
| 1223 $element->delete_Obj; | |
| 1224 }; | |
| 1225 } | |
| 1226 } | |
| 1227 } | |
| 1228 return(1); | |
| 1229 } | |
| 1230 | |
| 1231 1; | 
