comparison variant_effect_predictor/Bio/LiveSeq/Transcript.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: Transcript.pm,v 1.17 2002/09/25 08:58:23 heikki Exp $
2 #
3 # bioperl module for Bio::LiveSeq::Transcript
4 #
5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
6 #
7 # Copyright Joseph Insana
8 #
9 # You may distribute this module under the same terms as perl itself
10 #
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::LiveSeq::Transcript - Transcript class for LiveSeq
16
17 =head1 SYNOPSIS
18
19 # documentation needed
20
21 =head1 DESCRIPTION
22
23 This stores informations about coding sequences (CDS).
24 The implementation is that a Transcript object accesses a collection of
25 Exon objects, inferring from them the nucleotide structure and sequence.
26
27 =head1 AUTHOR - Joseph A.L. Insana
28
29 Email: Insana@ebi.ac.uk, jinsana@gmx.net
30
31 Address:
32
33 EMBL Outstation, European Bioinformatics Institute
34 Wellcome Trust Genome Campus, Hinxton
35 Cambs. CB10 1SD, United Kingdom
36
37 =head1 APPENDIX
38
39 The rest of the documentation details each of the object
40 methods. Internal methods are usually preceded with a _
41
42 =cut
43
44 # Let the code begin...
45
46 package Bio::LiveSeq::Transcript;
47 $VERSION=5.2;
48
49 # Version history:
50 # Tue Mar 21 14:38:02 GMT 2000 v 1.0 begun
51 # Tue Mar 21 17:45:31 GMT 2000 v 1.1 new() created
52 # Wed Mar 22 19:40:13 GMT 2000 v 1.4 all_Exons() seq(), length(), all_labels()
53 # Thu Mar 23 19:08:36 GMT 2000 v 1.5 follows() replaces is_downstream()
54 # Thu Mar 23 20:59:02 GMT 2000 v 2.0 valid _inside_position label position
55 # Fri Mar 24 18:33:18 GMT 2000 v 2.2 rewritten position(), now should work with diverse coordinate_starts
56 # Sat Mar 25 04:08:18 GMT 2000 v 2.21 added firstlabel to position and label so that Translation can exploit it
57 # Sat Mar 25 06:39:27 GMT 2000 v 2.3 started override of subseq, works just internally
58 # Mon Mar 27 19:05:15 BST 2000 v 2.4 subseq finished, it works with coord_start
59 # Fri Mar 31 18:48:07 BST 2000 v 2.5 started downstream_seq()
60 # Mon Apr 3 17:37:34 BST 2000 v 2.52 upstream_seq added
61 # Fri Apr 7 03:29:43 BST 2000 v 2.6 up/downstream now can use Gene information
62 # Sat Apr 8 12:59:58 BST 2000 v 3.0 all_Exons now skips no more valid exons
63 # Sat Apr 8 13:32:08 BST 2000 v 3.1 get_Translation added
64 # Wed Apr 12 12:37:08 BST 2000 v 3.2 all_Exons updates Transcript's start/end
65 # Wed Apr 12 12:41:22 BST 2000 v 3.3 each Exon has "transcript" attribute added
66 # Wed Apr 12 16:35:56 BST 2000 v 3.4 started coding _deletecheck
67 # Wed Apr 12 23:40:19 BST 2000 v 3.5 start and end redefined here, no more checks after deletion to refix start/end attributes. And no need of those. Eliminated hence from new()
68 # Wed Apr 12 23:47:02 BST 2000 v 3.9 finished _deletecheck, debugging starts
69 # Thu Apr 13 00:37:16 BST 2000 v 4.0 debugging done: seems working OK
70 # Thu Apr 27 16:18:55 BST 2000 v 4.1 translation_table added
71 # Tue May 16 17:57:40 BST 2000 v 4.11 corrected bug in docs of downstream_seq
72 # Wed May 17 16:48:34 BST 2000 v 4.2 frame() added
73 # Mon May 22 15:22:12 BST 2000 v 4.21 labelsubseq tweaked for cases where startlabel==endlabel (no useless follow() query!)
74 # Thu Jun 22 20:02:39 BST 2000 v 4.3 valid() moved to SeqI, to be inherited as the general one
75 # Thu Jun 22 20:27:57 BST 2000 v 4.4 optimized labelsubseq coded!
76 # Thu Jun 22 21:17:51 BST 2000 v 4.44 in_which_Exon() added
77 # Sat Jun 24 00:49:55 BST 2000 v 4.5 new subseq() that exploits the new fast labelsubseq
78 # Thu Jun 29 16:31:19 BST 2000 v 5.0 downsream_seq and upstream_seq recoded so that if entry is RNA it will return sequences up to the entry limits -> it should be properly debugged, expecially the upstream_seq one
79 # Wed Jul 12 04:01:53 BST 2000 v 5.1 croak -> carp+return(-1)
80 # Wed Mar 28 15:16:21 BST 2001 v 5.2 carp -> warn,throw (coded methods in SeqI)
81
82 use strict;
83 # use Carp qw(carp cluck);
84 use vars qw($VERSION @ISA);
85 use Bio::LiveSeq::SeqI 3.2; # uses SeqI, inherits from it
86 use Bio::LiveSeq::Exon 1.0; # uses Exon to create new exon in case of deletion
87 @ISA=qw(Bio::LiveSeq::SeqI);
88
89 =head2 new
90
91 Title : new
92 Usage : $transcript = Bio::LiveSeq::Transcript->new(-exons => \@obj_refs);
93
94 Function: generates a new Bio::LiveSeq::Transcript
95 Returns : reference to a new object of class Transcript
96 Errorcode -1
97 Args : reference to an array of Exon object references
98
99 =cut
100
101 sub new {
102 my ($thing, %args) = @_;
103 my $class = ref($thing) || $thing;
104 my ($obj,%transcript);
105
106 my @exons=@{$args{-exons}};
107
108 $obj = \%transcript;
109 $obj = bless $obj, $class;
110
111 unless (@exons) {
112 $obj->warn("$class not initialised because exons array empty");
113 return(-1);
114 }
115
116 # now useless, after start and end methods have been overridden here
117 my $firstexon = $exons[0];
118 #my $lastexon = $exons[-1];
119 #my $start = $firstexon->start;
120 #my $end = $lastexon->end;
121 my $strand = $firstexon->strand;
122 my $seq = $firstexon->{'seq'};
123 $obj->alphabet('rna');
124
125 unless (_checkexons(\@exons)) {
126 $obj->warn("$class not initialised because of problems in the exon structure");
127 return(-1);
128 }
129 $obj->{'strand'}=$strand;
130 $obj->{'exons'}=\@exons;
131 $obj->{'seq'}=$seq;
132
133 # set Transcript into each Exon
134 my $exon;
135 foreach $exon (@exons) {
136 $exon->{'transcript'}=$obj;
137 }
138 return $obj;
139 }
140
141
142 =head2 all_Exons
143
144 Title : all_Exons
145 Usage : $transcript_obj->all_Exons()
146 Function: returns references to all Exon objects the Transcript is composed of
147 Example : foreach $exon ($transcript->all_Exons()) { do_something }
148 Returns : array of object references
149 Args : none
150
151 =cut
152
153 sub all_Exons {
154 my $self=shift;
155 my $exonsref=$self->{'exons'};
156 my @exons=@{$exonsref};
157 my @newexons;
158 my $exon;
159 foreach $exon (@exons) {
160 unless ($exon->obj_valid) {
161 $self->warn("$exon no more valid, start or end label lost, skipping....",1); # ignorable
162 } else {
163 push(@newexons,$exon);
164 }
165 }
166 if ($#exons != $#newexons) {
167 # update exons field
168 $self->{'exons'}=\@newexons;
169 }
170 return (@newexons);
171 }
172
173 =head2 downstream_seq
174
175 Title : downstream_seq
176 Usage : $transcript_obj->downstream_seq()
177 : $transcript_obj->downstream_seq(64)
178 Function: returns a string of nucleotides downstream of the end of the
179 CDS. If there is some information of the real mRNA, from features in
180 an attached Gene object, it will return up to those boundaries.
181 Otherwise it will return 1000 nucleotides.
182 If an argument is given it will override the default 1000 number
183 and return instead /that/ requested number of nucleotides.
184 But if a Gene object is attached, this argument will be ignored.
185 Returns : string
186 Args : an optional integer number of nucleotides to be returned instead of
187 the default if no gene attached
188
189 =cut
190
191 sub downstream_seq {
192 my ($self,$howmany)=@_;
193 my $str;
194 if (defined ($howmany)) {
195 unless ($howmany > 0) {
196 $self->throw("No sense in asking less than 1 downstream nucleotides!");
197 }
198 } else {
199 unless ($self->{'seq'}->alphabet eq 'rna') { # if rna retrieve until the end
200 #$str=$DNAobj->labelsubseq($self->end,undef,undef,"unsecuremoderequested");
201 #return(substr($str,1)); # delete first nucleotide that is the last of Transcript
202 if ($self->gene) { # if there is Gene object attached fetch relevant info
203 $str=$self->{'seq'}->labelsubseq($self->end,undef,$self->gene->maxtranscript->end); # retrieve from end of this Transcript to end of the maxtranscript
204 $str=substr($str,1); # delete first nucleotide that is the last of Transcript
205 if (CORE::length($str) > 0) {
206 return($str);
207 } else { # if there was no downstream through the gene's maxtranscript, go the usual way
208 $howmany = 1000;
209 }
210 } else {
211 $howmany = 1000;
212 }
213 }
214 }
215 my @exons=$self->all_Exons;
216 my $strand=$self->strand();
217 my $lastexon=$exons[-1];
218 my $lastexonlength=$lastexon->length;
219 # $howmany nucs after end of last exon
220 #my $downstream_seq=$lastexon->subseq($lastexonlength+1,undef,$howmany);
221 my $downstream_seq;
222
223 if ($howmany) {
224 $downstream_seq=substr($lastexon->labelsubseq($self->end,$howmany,undef,"unsecuremoderequested"),1);
225 } else {
226 if ($strand == 1) {
227 $downstream_seq=substr($lastexon->labelsubseq($self->end,undef,$self->{'seq'}->end,"unsecuremoderequested"),1);
228 } else {
229 $downstream_seq=substr($lastexon->labelsubseq($self->end,undef,$self->{'seq'}->start,"unsecuremoderequested"),1);
230 }
231 }
232 return $downstream_seq;
233 }
234
235 =head2 upstream_seq
236
237 Title : upstream_seq
238 Usage : $transcript_obj->upstream_seq()
239 : $transcript_obj->upstream_seq(64)
240 Function: just like downstream_seq but returns nucleotides before the ATG
241 Note : the default, if no Gene information present and no nucleotides
242 number given, is to return up to 400 nucleotides.
243
244 =cut
245
246 sub upstream_seq {
247 my ($self,$howmany)=@_;
248 if (defined ($howmany)) {
249 unless ($howmany > 0) {
250 $self->throw("No sense in asking less than 1 upstream nucleotides!");
251 }
252 } else {
253 unless ($self->{'seq'}->alphabet eq 'rna') { # if rna retrieve from the start
254 if ($self->gene) { # if there is Gene object attached fetch relevant info
255 my $str=$self->{'seq'}->labelsubseq($self->gene->maxtranscript->start,undef,$self->start); # retrieve from start of maxtranscript to start of this Transcript
256 chop $str; # delete last nucleotide that is the A of starting ATG
257 if (length($str) > 0) {
258 return($str);
259 } else { # if there was no upstream through the gene's maxtranscript, go the usual way
260 $howmany = 400;
261 }
262 } else {
263 $howmany = 400;
264 }
265 }
266 }
267 my @exons=$self->all_Exons;
268 my $firstexon=$exons[0];
269
270 my $upstream_seq;
271 my $strand=$self->strand();
272
273 if ($howmany) {# $howmany nucs before begin of first exon
274 my $labelbefore=$firstexon->label(-$howmany,$firstexon->start);
275 if ($labelbefore < 1) {
276 if ($strand == 1) {
277 $labelbefore=$self->{'seq'}->start;
278 } else {
279 $labelbefore=$self->{'seq'}->end;
280 }
281 }
282 $upstream_seq=$firstexon->labelsubseq($labelbefore,undef,$firstexon->start,"unsecuremoderequested");
283 chop $upstream_seq;
284 } else {
285 if ($strand == 1) {
286 $upstream_seq=$firstexon->labelsubseq($self->{'seq'}->start,undef,$self->start,"unsecuremoderequested");
287 chop $upstream_seq; # delete last nucleotide that is the A of starting ATG
288 } else {
289 $upstream_seq=$firstexon->labelsubseq($self->{'seq'}->end,undef,$self->start,"unsecuremoderequested");
290 chop $upstream_seq; # delete last nucleotide that is the A of starting ATG
291 }
292 }
293 return $upstream_seq;
294 }
295
296 # These get redefined here, overriding the SeqI one because they draw their
297 # information from the Exons a Transcript is built of
298 # optional argument: firstlabel. If not given, it checks coordinate_start
299 # This is useful when called by Translation
300 # also used by _delete
301 sub label {
302 my ($self,$position,$firstlabel)=@_;
303 unless ($position) { # if position = 0 complain ?
304 $self->warn("Position not given or position 0");
305 return (-1);
306 }
307 my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
308 my ($label,@labels,$length,$arraypos);
309 unless (defined ($firstlabel)) {
310 $firstlabel=$self->coordinate_start; # this is inside Transcript obj
311 }
312 my $coord_pos=$self->_inside_position($firstlabel);
313 $length=$self->length;
314 #if ($strand == 1) {
315 if ($position < 1) {
316 $position++; # to account for missing of 0 position
317 }
318 $arraypos=$position+$coord_pos-2;
319 #print "\n=-=-=-=-DEBUG: arraypos $arraypos, pos $position, coordpos: $coord_pos";
320 if ($arraypos < 0) {
321 $label=$self->{'seq'}->label($arraypos,$start,$strand); #?
322 } elsif ($arraypos >= $length) {
323 $label=$self->{'seq'}->label($arraypos-$length+2,$end,$strand); #?
324 } else { # inside the Transcript
325 @labels=$self->all_labels;
326 $label=$labels[$arraypos];
327 }
328 #}
329 }
330
331 # argument: label
332 # returns: position of label according to coord_start
333 # errorcode: 0 label not found
334 # optional argument: firstlabel. If not given, it checks coordinate_start
335 # This is useful when called by Translation
336 sub position {
337 my ($self,$label,$firstlabel)=@_;
338 unless ($self->{'seq'}->valid($label)) {
339 $self->warn("label is not valid");
340 return (0);
341 }
342 unless (defined ($firstlabel)) {
343 $firstlabel=$self->coordinate_start; # this is inside Transcript obj
344 }
345 if ($label == $firstlabel) {
346 return (1);
347 }
348 my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
349 my ($position,$in_pos,$out_pos,$coord_pos);
350 my $length=$self->length;
351 $coord_pos=$self->_inside_position($firstlabel);
352 if ($self->valid($label)) { # if label is inside the Transcript
353 $in_pos=$self->_inside_position($label);
354 $position=$in_pos-$coord_pos+1;
355 if ($position <= 0) {
356 return ($position-1); # accounts for the missing of the 0 position
357 }
358 } else {
359 if ($self->follows($end,$label)) { # label after end of transcript
360 $out_pos=$self->{'seq'}->position($label,$end,$strand);
361 #print "\n+++++++++DEBUG label $label FOLLOWS end $end outpos $out_pos coordpos $coord_pos";
362 $position=$out_pos+$length-$coord_pos;
363 } elsif ($self->follows($label,$start)) { # label before begin of transcript
364 #print "\n+++++++++DEBUG label $label BEFORE start $start outpos $out_pos coordpos $coord_pos";
365 $out_pos=$self->{'seq'}->position($label,$start,$strand);
366 $position=$out_pos-$coord_pos+1;
367 } else { # label is in intron (not valid, not after, not before)!
368 $self->warn("Cannot give position of label pointing to intron according to CDS numbering!",1);
369 return (0);
370 }
371 }
372 return ($position);
373 }
374
375 sub seq {
376 my $self=shift;
377 my ($exon,$str);
378 my @exons=$self->all_Exons();
379 foreach $exon (@exons) {
380 $str .= $exon->seq();
381 }
382 return $str;
383 }
384
385 sub length {
386 my $self=shift;
387 my ($exon,$length);
388 my @exons=$self->all_Exons();
389 foreach $exon (@exons) {
390 $length += $exon->length();
391 }
392 return $length;
393 }
394
395 sub all_labels {
396 my $self=shift;
397 my ($exon,@labels);
398 my @exons=$self->all_Exons();
399 foreach $exon (@exons) {
400 push (@labels,$exon->all_labels());
401 }
402 return @labels;
403 }
404
405 # redefined here so that it will retrieve effective subseq without introns
406 # otherwise it would have retrieved an underlying DNA (possibly with introns)
407 # subsequence
408 # Drawback: this is really bulky, label->position and then a call to
409 # subseq that will do the opposite position-> label
410 #
411 # one day this can be rewritten as the main one so that the normal subseq
412 # will rely on this one and hence avoid this double (useless and lengthy)
413 # conversion between labels and positions
414 sub old_labelsubseq {
415 my ($self,$start,$length,$end)=@_;
416 my ($pos1,$pos2);
417 if ($start) {
418 unless ($self->valid($start)) {
419 $self->warn("Start label not valid"); return (-1);
420 }
421 $pos1=$self->position($start);
422 }
423 if ($end) {
424 if ($end == $start) {
425 $length=1;
426 } else {
427 unless ($self->valid($end)) {
428 $self->warn("End label not valid"); return (-1);
429 }
430 unless ($self->follows($start,$end) == 1) {
431 $self->warn("End label does not follow Start label!"); return (-1);
432 }
433 $pos2=$self->position($end);
434 undef $length;
435 }
436 }
437 return ($self->subseq($pos1,$pos2,$length));
438 }
439
440 # rewritten, eventually
441
442 sub labelsubseq {
443 my ($self,$start,$length,$end,$unsecuremode)=@_;
444 unless (defined $unsecuremode &&
445 $unsecuremode eq "unsecuremoderequested")
446 { # to skip security checks (faster)
447 if ($start) {
448 unless ($self->valid($start)) {
449 $self->warn("Start label not valid"); return (-1);
450 }
451 } else {
452 $start=$self->start;
453 }
454 if ($end) {
455 if ($end == $start) {
456 $length=1;
457 undef $end;
458 } else {
459 undef $length; # end argument overrides length argument
460 unless ($self->valid($end)) {
461 $self->warn("End label not valid"); return (-1);
462 }
463 unless ($self->follows($start,$end) == 1) {
464 $self->warn("End label does not follow Start label!"); return (-1);
465 }
466 }
467 } else {
468 $end=$self->end;
469 }
470 }
471 my ($seq,$exon,$startexon,$endexon); my @exonlabels;
472 my @exons=$self->all_Exons;
473 EXONCHECK:
474 foreach $exon (@exons) {
475 if ((!(defined($startexon)))&&($exon->valid($start))) { # checks only if not yet found
476 $startexon=$exon;
477 }
478 if ($exon->valid($end)) {
479 $endexon=$exon;
480 }
481 if ((!(defined($seq)) && (defined($startexon)))) { # initializes only once
482 if ((defined($endexon)) && ($endexon eq $startexon)) { # then perfect, we are finished
483 if ($length) {
484 $seq = $startexon->labelsubseq($start,$length,undef,"unsecuremoderequested");
485
486
487 last EXONCHECK;
488 } else {
489 $seq = $startexon->labelsubseq($start,undef,$end,"unsecuremoderequested");
490 }
491 last EXONCHECK;
492 } else { # get up to the end of the exon
493 $seq = $startexon->labelsubseq($start,undef,undef,"unsecuremoderequested");
494 }
495 }
496 if (($startexon)&&($exon ne $startexon)) {
497 if (defined($endexon)) { # we arrived to the last exon
498 $seq .= $endexon->labelsubseq(undef,undef,$end,"unsecuremoderequested"); # get from the start of the exon
499 last EXONCHECK;
500
501 } elsif (defined($startexon)) { # we are in a whole-exon-in-the-middle case
502 $seq .= $exon->seq; # we add it completely to the seq
503 } # else, we still have to reach the start point, exon useless, we move on
504 if ($length) { # if length argument specified
505 if (($seq && (CORE::length($seq) >= $length))) {
506 last EXONCHECK;
507 }
508 }
509 }
510 }
511 if ($length) {
512 return (substr($seq,0,$length));
513 } else {
514 return ($seq);
515 }
516 }
517
518
519 # argument: label
520 # returns: the objref and progressive number of the Exon containing that label
521 # errorcode: -1
522 sub in_which_Exon {
523 my ($self,$label)=@_;
524 my ($count,$exon);
525 my @exons=$self->all_Exons;
526 foreach $exon (@exons) {
527 $count++; # 1st exon is numbered "1"
528 if ($exon->valid($label)) {
529 return ($exon,$count)
530 }
531 }
532 return (-1); # if nothing found
533 }
534
535 # recoded to exploit the new fast labelsubseq()
536 # valid only inside Transcript
537 sub subseq {
538 my ($self,$pos1,$pos2,$length) = @_;
539 my ($str,$startlabel,$endlabel);
540 if (defined ($pos1)) {
541 if ($pos1 == 0) { # if position = 0 complain
542 $self->warn("Position cannot be 0!"); return (-1);
543 }
544 if ((defined ($pos2))&&($pos1>$pos2)) {
545 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
546 }
547 $startlabel=$self->label($pos1);
548 unless ($self->valid($startlabel)) {
549 $self->warn("Start label not valid"); return (-1);
550 }
551 if ($startlabel < 1) {
552 $self->warn("position $pos1 not valid as start of subseq!"); return (-1);
553 }
554 } else {
555 $startlabel=$self->start;
556 }
557 if (defined ($pos2)) {
558 if ($pos2 == 0) { # if position = 0 complain
559 $self->warn("Position cannot be 0!"); return (-1);
560 }
561 undef $length;
562 if ((defined ($pos1))&&($pos1>$pos2)) {
563 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
564 }
565 $endlabel=$self->label($pos2);
566 unless ($self->valid($endlabel)) {
567 $self->warn("End label not valid"); return (-1);
568 }
569 if ($endlabel < 1) {
570 $self->warn("position $pos2 not valid as end of subseq!"); return (-1);
571 }
572 } else {
573 unless (defined ($length)) {
574 $endlabel=$self->end;
575 }
576 }
577 return ($self->labelsubseq($startlabel,$length,$endlabel,"unsecuremoderequested"));
578 }
579
580 # works only inside the transcript, complains if asked outside
581 sub old_subseq {
582 my ($self,$pos1,$pos2,$length) = @_;
583 my ($str,$startcount,$endcount,$seq,$seqlength);
584 if (defined ($length)) {
585 if ($length < 1) {
586 $self->warn("No sense asking for a subseq of length < 1");
587 return (-1);
588 }
589 }
590 my $firstlabel=$self->coordinate_start; # this is inside Transcript obj
591 my $coord_pos=$self->_inside_position($firstlabel); # TESTME old
592 $seq=$self->seq;
593 $seqlength=CORE::length($seq);
594 unless (defined ($pos1)) {
595 $startcount=1+$coord_pos-1; # i.e. coord_pos
596 } else {
597 if ($pos1 == 0) { # if position = 0 complain
598 $self->warn("Position cannot be 0!"); return (-1);
599 } elsif ($pos1 < 0) {
600 $pos1++;
601 }
602 if ((defined ($pos2))&&($pos1>$pos2)) {
603 $self->warn("1st position ($pos1) cannot be > 2nd position ($pos2)!");
604 return (-1);
605 }
606 $startcount=$pos1+$coord_pos-1;
607 }
608 unless (defined ($pos2)) {
609 ;
610 } else {
611 if ($pos2 == 0) { # if position = 0 complain
612 $self->warn("Position cannot be 0!"); return (-1);
613 } elsif ($pos2 < 0) {
614 $pos2++;
615 }
616 if ((defined ($pos1))&&($pos1>$pos2)) {
617 $self->warn("1st position ($pos1) cannot be > 2nd position ($pos2)!");
618 return (-1);
619 }
620 $endcount=$pos2+$coord_pos-1;
621 if ($endcount > $seqlength) {
622 #print "\n###DEBUG###: pos1 $pos1 pos2 $pos2 coordpos $coord_pos endcount $endcount seqln $seqlength\n";
623 $self->warn("Cannot access end position after the end of Transcript");
624 return (-1);
625 }
626 $length=$endcount-$startcount+1;
627 }
628 #print "\n###DEBUG pos1 $pos1 pos2 $pos2 start $startcount end $endcount length $length coordpos $coord_pos\n";
629 my $offset=$startcount-1;
630 if ($offset < 0) {
631 $self->warn("Cannot access startposition before the beginning of Transcript, returning from start",1); # ignorable
632 return (substr($seq,0,$length));
633 } elsif ($offset >= $seqlength) {
634 $self->warn("Cannot access startposition after the end of Transcript");
635 return (-1);
636 } else {
637 $str=substr($seq,$offset,$length);
638 if (CORE::length($str) < $length) {
639 $self->warn("Attention, cannot return the length requested ".
640 "for subseq",1) if $self->verbose > 0; # ignorable
641 }
642 return $str;
643 }
644 }
645
646 # redefined so that it doesn't require other methods (after deletions) to
647 # reset it.
648 sub start {
649 my $self = shift;
650 my $exonsref=$self->{'exons'};
651 my @exons=@{$exonsref};
652 return ($exons[0]->start);
653 }
654
655 sub end {
656 my $self = shift;
657 my $exonsref=$self->{'exons'};
658 my @exons=@{$exonsref};
659 return ($exons[-1]->end);
660 }
661
662
663 # internal methods begin here
664
665 # returns: position of label in transcript's all_labels
666 # with STARTlabel == 1
667 # errorcode 0 -> label not found
668 # argument: label
669 sub _inside_position {
670 my ($self,$label)=@_;
671 my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
672 my ($position,$checkme);
673 my @labels=$self->all_labels;
674 foreach $checkme (@labels) {
675 $position++;
676 if ($label == $checkme) {
677 return ($position);
678 }
679 }
680 return (0);
681 }
682
683 # returns 1 OK or 0 ERROR
684 # arguments: reference to array of Exon object references
685 sub _checkexons {
686 my ($exon,$thisstart);
687 my $self=$exon;
688 my $exonsref=$_[0];
689 my @exons=@{$exonsref};
690
691 my $firstexon = $exons[0];
692
693 unless (ref($firstexon) eq "Bio::LiveSeq::Exon") {
694 $self->warn("Object not of class Exon");
695 return (0);
696 }
697 my $strand = $firstexon->strand;
698
699 my $prevend = $firstexon->end;
700 shift @exons; # skip first one
701 foreach $exon (@exons) {
702 unless (ref($exon) eq "Bio::LiveSeq::Exon") { # object class check
703 $self->warn("Object not of class Exon");
704 return (0);
705 }
706 if ($exon->strand != $strand) { # strand consistency check
707 $self->warn("Exons' strands not consistent when trying to create Transcript");
708 return (0);
709 }
710 $thisstart = $exon->start;
711 unless ($exon->{'seq'}->follows($prevend,$thisstart,$strand)) {
712 $self->warn("Exons not in correct order when trying to create Transcript");
713 return (0);
714 }
715 $prevend = $exon->end;
716 }
717 return (1);
718 }
719
720 =head2 get_Translation
721
722 Title : valid
723 Usage : $translation = $obj->get_Translation()
724 Function: retrieves the reference to the object of class Translation (if any)
725 attached to a LiveSeq object
726 Returns : object reference
727 Args : none
728
729 =cut
730
731 sub get_Translation {
732 my $self=shift;
733 return ($self->{'translation'}); # this is set when Translation->new is called
734 }
735
736 # this checks so that deletion spanning multiple exons is
737 # handled accordingly and correctly
738 # arguments: begin and end label of a deletion
739 # this is called BEFORE any deletion in the chain
740 sub _deletecheck {
741 my ($self,$startlabel,$endlabel)=@_;
742 my $exonsref=$self->{'exons'};
743 my @exons=@{$exonsref};
744 my ($startexon,$endexon,$exon);
745 $startexon=$endexon=0;
746 foreach $exon (@exons) {
747 if (($startexon == 0)&&($exon->valid($startlabel))) {
748 $startexon=$exon; # exon containing start of deletion
749 }
750 if (($endexon == 0)&&($exon->valid($endlabel))) {
751 $endexon=$exon; # exon containing end of deletion
752 }
753 if (($startexon)&&($endexon)) {
754 last; # don't check further
755 }
756 }
757 my $nextend=$self->label(2,$endlabel); # retrieve the next label
758 my $prevstart=$self->label(-1,$startlabel); # retrieve the prev label
759
760 if ($startexon eq $endexon) { # intra-exon deletion
761 if (($startexon->start eq $startlabel) && ($startexon->end eq $endlabel)) {
762 # let's delete the entire exon
763 my @newexons;
764 foreach $exon (@exons) {
765 unless ($exon eq $startexon) {
766 push(@newexons,$exon);
767 }
768 }
769 $self->{'exons'}=\@newexons;
770 } elsif ($startexon->start eq $startlabel) { # special cases
771 $startexon->{'start'}=$nextend; # set a new start of exon
772 } elsif ($startexon->end eq $endlabel) {
773 $startexon->{'end'}=$prevstart; # set a new end of exon
774 } else {
775 return; # no problem
776 }
777 } else { # two new exons to be created, inter-exons deletion
778 my @newexons;
779 my $exonobj;
780 my $dna=$self->{'seq'};
781 my $strand=$self->strand;
782 my $notmiddle=1; # flag for skipping exons in the middle of deletion
783 foreach $exon (@exons) {
784 if ($exon eq $startexon) {
785 $exonobj=Bio::LiveSeq::Exon->new('-seq'=>$dna,'-start'=>$exon->start,'-end'=>$prevstart,'-strand'=>$strand); # new partial exon
786 push(@newexons,$exonobj);
787 $notmiddle=0; # now we enter totally deleted exons
788 } elsif ($exon eq $endexon) {
789 $exonobj=Bio::LiveSeq::Exon->new('-seq'=>$dna,'-start'=>$nextend,'-end'=>$exon->end,'-strand'=>$strand); # new partial exon
790 push(@newexons,$exonobj);
791 $notmiddle=1; # exiting totally deleted exons
792 } else {
793 if ($notmiddle) { # if before or after exons with deletion
794 push(@newexons,$exon);
795 }# else skip them
796 }
797 }
798 $self->{'exons'}=\@newexons;
799 }
800 }
801
802 =head2 translation_table
803
804 Title : translation_table
805 Usage : $name = $obj->translation_table;
806 : $name = $obj->translation_table(11);
807 Function: Returns or sets the translation_table used for translating the
808 transcript.
809 If it has never been set, it will return undef.
810 Returns : an integer
811
812 =cut
813
814 sub translation_table {
815 my ($self,$value) = @_;
816 if (defined $value) {
817 $self->{'translation_table'} = $value;
818 }
819 unless (exists $self->{'translation_table'}) {
820 return (undef);
821 } else {
822 return $self->{'translation_table'};
823 }
824 }
825
826 =head2 frame
827
828 Title : frame
829 Usage : $frame = $transcript->frame($label);
830 Function: Returns the frame of a particular nucleotide.
831 Frame can be 0 1 or 2 and means the position in the codon triplet
832 of the particulat nucleotide. 0 is the first codon_position.
833 Codon_position (1 2 3) is simply frame+1.
834 If the label asked for is not inside the Transcript, -1 will be
835 returned.
836 Args : a label
837 Returns : 0 1 or 2
838 Errorcode -1
839
840 =cut
841
842 # args: label
843 # returns: frame of nucleotide (0 1 2)
844 # errorcode: -1
845 sub frame {
846 my ($self,$inputlabel)=@_;
847 my @labels=$self->all_labels;
848 my ($label,$frame,$count);
849 foreach $label (@labels) {
850 if ($inputlabel == $label) {
851 return ($count % 3);
852 }
853 $count++; # 0 1 2 3 4....
854 }
855 return (-1); # label not found amid Transcript labels
856 }
857
858 1;