Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/LiveSeq/Transcript.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: Transcript.pm,v 1.17 2002/09/25 08:58:23 heikki Exp $ | |
2 # | |
3 # bioperl module for Bio::LiveSeq::Transcript | |
4 # | |
5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net> | |
6 # | |
7 # Copyright Joseph Insana | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 # | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::LiveSeq::Transcript - Transcript class for LiveSeq | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 # documentation needed | |
20 | |
21 =head1 DESCRIPTION | |
22 | |
23 This stores informations about coding sequences (CDS). | |
24 The implementation is that a Transcript object accesses a collection of | |
25 Exon objects, inferring from them the nucleotide structure and sequence. | |
26 | |
27 =head1 AUTHOR - Joseph A.L. Insana | |
28 | |
29 Email: Insana@ebi.ac.uk, jinsana@gmx.net | |
30 | |
31 Address: | |
32 | |
33 EMBL Outstation, European Bioinformatics Institute | |
34 Wellcome Trust Genome Campus, Hinxton | |
35 Cambs. CB10 1SD, United Kingdom | |
36 | |
37 =head1 APPENDIX | |
38 | |
39 The rest of the documentation details each of the object | |
40 methods. Internal methods are usually preceded with a _ | |
41 | |
42 =cut | |
43 | |
44 # Let the code begin... | |
45 | |
46 package Bio::LiveSeq::Transcript; | |
47 $VERSION=5.2; | |
48 | |
49 # Version history: | |
50 # Tue Mar 21 14:38:02 GMT 2000 v 1.0 begun | |
51 # Tue Mar 21 17:45:31 GMT 2000 v 1.1 new() created | |
52 # Wed Mar 22 19:40:13 GMT 2000 v 1.4 all_Exons() seq(), length(), all_labels() | |
53 # Thu Mar 23 19:08:36 GMT 2000 v 1.5 follows() replaces is_downstream() | |
54 # Thu Mar 23 20:59:02 GMT 2000 v 2.0 valid _inside_position label position | |
55 # Fri Mar 24 18:33:18 GMT 2000 v 2.2 rewritten position(), now should work with diverse coordinate_starts | |
56 # Sat Mar 25 04:08:18 GMT 2000 v 2.21 added firstlabel to position and label so that Translation can exploit it | |
57 # Sat Mar 25 06:39:27 GMT 2000 v 2.3 started override of subseq, works just internally | |
58 # Mon Mar 27 19:05:15 BST 2000 v 2.4 subseq finished, it works with coord_start | |
59 # Fri Mar 31 18:48:07 BST 2000 v 2.5 started downstream_seq() | |
60 # Mon Apr 3 17:37:34 BST 2000 v 2.52 upstream_seq added | |
61 # Fri Apr 7 03:29:43 BST 2000 v 2.6 up/downstream now can use Gene information | |
62 # Sat Apr 8 12:59:58 BST 2000 v 3.0 all_Exons now skips no more valid exons | |
63 # Sat Apr 8 13:32:08 BST 2000 v 3.1 get_Translation added | |
64 # Wed Apr 12 12:37:08 BST 2000 v 3.2 all_Exons updates Transcript's start/end | |
65 # Wed Apr 12 12:41:22 BST 2000 v 3.3 each Exon has "transcript" attribute added | |
66 # Wed Apr 12 16:35:56 BST 2000 v 3.4 started coding _deletecheck | |
67 # Wed Apr 12 23:40:19 BST 2000 v 3.5 start and end redefined here, no more checks after deletion to refix start/end attributes. And no need of those. Eliminated hence from new() | |
68 # Wed Apr 12 23:47:02 BST 2000 v 3.9 finished _deletecheck, debugging starts | |
69 # Thu Apr 13 00:37:16 BST 2000 v 4.0 debugging done: seems working OK | |
70 # Thu Apr 27 16:18:55 BST 2000 v 4.1 translation_table added | |
71 # Tue May 16 17:57:40 BST 2000 v 4.11 corrected bug in docs of downstream_seq | |
72 # Wed May 17 16:48:34 BST 2000 v 4.2 frame() added | |
73 # Mon May 22 15:22:12 BST 2000 v 4.21 labelsubseq tweaked for cases where startlabel==endlabel (no useless follow() query!) | |
74 # Thu Jun 22 20:02:39 BST 2000 v 4.3 valid() moved to SeqI, to be inherited as the general one | |
75 # Thu Jun 22 20:27:57 BST 2000 v 4.4 optimized labelsubseq coded! | |
76 # Thu Jun 22 21:17:51 BST 2000 v 4.44 in_which_Exon() added | |
77 # Sat Jun 24 00:49:55 BST 2000 v 4.5 new subseq() that exploits the new fast labelsubseq | |
78 # Thu Jun 29 16:31:19 BST 2000 v 5.0 downsream_seq and upstream_seq recoded so that if entry is RNA it will return sequences up to the entry limits -> it should be properly debugged, expecially the upstream_seq one | |
79 # Wed Jul 12 04:01:53 BST 2000 v 5.1 croak -> carp+return(-1) | |
80 # Wed Mar 28 15:16:21 BST 2001 v 5.2 carp -> warn,throw (coded methods in SeqI) | |
81 | |
82 use strict; | |
83 # use Carp qw(carp cluck); | |
84 use vars qw($VERSION @ISA); | |
85 use Bio::LiveSeq::SeqI 3.2; # uses SeqI, inherits from it | |
86 use Bio::LiveSeq::Exon 1.0; # uses Exon to create new exon in case of deletion | |
87 @ISA=qw(Bio::LiveSeq::SeqI); | |
88 | |
89 =head2 new | |
90 | |
91 Title : new | |
92 Usage : $transcript = Bio::LiveSeq::Transcript->new(-exons => \@obj_refs); | |
93 | |
94 Function: generates a new Bio::LiveSeq::Transcript | |
95 Returns : reference to a new object of class Transcript | |
96 Errorcode -1 | |
97 Args : reference to an array of Exon object references | |
98 | |
99 =cut | |
100 | |
101 sub new { | |
102 my ($thing, %args) = @_; | |
103 my $class = ref($thing) || $thing; | |
104 my ($obj,%transcript); | |
105 | |
106 my @exons=@{$args{-exons}}; | |
107 | |
108 $obj = \%transcript; | |
109 $obj = bless $obj, $class; | |
110 | |
111 unless (@exons) { | |
112 $obj->warn("$class not initialised because exons array empty"); | |
113 return(-1); | |
114 } | |
115 | |
116 # now useless, after start and end methods have been overridden here | |
117 my $firstexon = $exons[0]; | |
118 #my $lastexon = $exons[-1]; | |
119 #my $start = $firstexon->start; | |
120 #my $end = $lastexon->end; | |
121 my $strand = $firstexon->strand; | |
122 my $seq = $firstexon->{'seq'}; | |
123 $obj->alphabet('rna'); | |
124 | |
125 unless (_checkexons(\@exons)) { | |
126 $obj->warn("$class not initialised because of problems in the exon structure"); | |
127 return(-1); | |
128 } | |
129 $obj->{'strand'}=$strand; | |
130 $obj->{'exons'}=\@exons; | |
131 $obj->{'seq'}=$seq; | |
132 | |
133 # set Transcript into each Exon | |
134 my $exon; | |
135 foreach $exon (@exons) { | |
136 $exon->{'transcript'}=$obj; | |
137 } | |
138 return $obj; | |
139 } | |
140 | |
141 | |
142 =head2 all_Exons | |
143 | |
144 Title : all_Exons | |
145 Usage : $transcript_obj->all_Exons() | |
146 Function: returns references to all Exon objects the Transcript is composed of | |
147 Example : foreach $exon ($transcript->all_Exons()) { do_something } | |
148 Returns : array of object references | |
149 Args : none | |
150 | |
151 =cut | |
152 | |
153 sub all_Exons { | |
154 my $self=shift; | |
155 my $exonsref=$self->{'exons'}; | |
156 my @exons=@{$exonsref}; | |
157 my @newexons; | |
158 my $exon; | |
159 foreach $exon (@exons) { | |
160 unless ($exon->obj_valid) { | |
161 $self->warn("$exon no more valid, start or end label lost, skipping....",1); # ignorable | |
162 } else { | |
163 push(@newexons,$exon); | |
164 } | |
165 } | |
166 if ($#exons != $#newexons) { | |
167 # update exons field | |
168 $self->{'exons'}=\@newexons; | |
169 } | |
170 return (@newexons); | |
171 } | |
172 | |
173 =head2 downstream_seq | |
174 | |
175 Title : downstream_seq | |
176 Usage : $transcript_obj->downstream_seq() | |
177 : $transcript_obj->downstream_seq(64) | |
178 Function: returns a string of nucleotides downstream of the end of the | |
179 CDS. If there is some information of the real mRNA, from features in | |
180 an attached Gene object, it will return up to those boundaries. | |
181 Otherwise it will return 1000 nucleotides. | |
182 If an argument is given it will override the default 1000 number | |
183 and return instead /that/ requested number of nucleotides. | |
184 But if a Gene object is attached, this argument will be ignored. | |
185 Returns : string | |
186 Args : an optional integer number of nucleotides to be returned instead of | |
187 the default if no gene attached | |
188 | |
189 =cut | |
190 | |
191 sub downstream_seq { | |
192 my ($self,$howmany)=@_; | |
193 my $str; | |
194 if (defined ($howmany)) { | |
195 unless ($howmany > 0) { | |
196 $self->throw("No sense in asking less than 1 downstream nucleotides!"); | |
197 } | |
198 } else { | |
199 unless ($self->{'seq'}->alphabet eq 'rna') { # if rna retrieve until the end | |
200 #$str=$DNAobj->labelsubseq($self->end,undef,undef,"unsecuremoderequested"); | |
201 #return(substr($str,1)); # delete first nucleotide that is the last of Transcript | |
202 if ($self->gene) { # if there is Gene object attached fetch relevant info | |
203 $str=$self->{'seq'}->labelsubseq($self->end,undef,$self->gene->maxtranscript->end); # retrieve from end of this Transcript to end of the maxtranscript | |
204 $str=substr($str,1); # delete first nucleotide that is the last of Transcript | |
205 if (CORE::length($str) > 0) { | |
206 return($str); | |
207 } else { # if there was no downstream through the gene's maxtranscript, go the usual way | |
208 $howmany = 1000; | |
209 } | |
210 } else { | |
211 $howmany = 1000; | |
212 } | |
213 } | |
214 } | |
215 my @exons=$self->all_Exons; | |
216 my $strand=$self->strand(); | |
217 my $lastexon=$exons[-1]; | |
218 my $lastexonlength=$lastexon->length; | |
219 # $howmany nucs after end of last exon | |
220 #my $downstream_seq=$lastexon->subseq($lastexonlength+1,undef,$howmany); | |
221 my $downstream_seq; | |
222 | |
223 if ($howmany) { | |
224 $downstream_seq=substr($lastexon->labelsubseq($self->end,$howmany,undef,"unsecuremoderequested"),1); | |
225 } else { | |
226 if ($strand == 1) { | |
227 $downstream_seq=substr($lastexon->labelsubseq($self->end,undef,$self->{'seq'}->end,"unsecuremoderequested"),1); | |
228 } else { | |
229 $downstream_seq=substr($lastexon->labelsubseq($self->end,undef,$self->{'seq'}->start,"unsecuremoderequested"),1); | |
230 } | |
231 } | |
232 return $downstream_seq; | |
233 } | |
234 | |
235 =head2 upstream_seq | |
236 | |
237 Title : upstream_seq | |
238 Usage : $transcript_obj->upstream_seq() | |
239 : $transcript_obj->upstream_seq(64) | |
240 Function: just like downstream_seq but returns nucleotides before the ATG | |
241 Note : the default, if no Gene information present and no nucleotides | |
242 number given, is to return up to 400 nucleotides. | |
243 | |
244 =cut | |
245 | |
246 sub upstream_seq { | |
247 my ($self,$howmany)=@_; | |
248 if (defined ($howmany)) { | |
249 unless ($howmany > 0) { | |
250 $self->throw("No sense in asking less than 1 upstream nucleotides!"); | |
251 } | |
252 } else { | |
253 unless ($self->{'seq'}->alphabet eq 'rna') { # if rna retrieve from the start | |
254 if ($self->gene) { # if there is Gene object attached fetch relevant info | |
255 my $str=$self->{'seq'}->labelsubseq($self->gene->maxtranscript->start,undef,$self->start); # retrieve from start of maxtranscript to start of this Transcript | |
256 chop $str; # delete last nucleotide that is the A of starting ATG | |
257 if (length($str) > 0) { | |
258 return($str); | |
259 } else { # if there was no upstream through the gene's maxtranscript, go the usual way | |
260 $howmany = 400; | |
261 } | |
262 } else { | |
263 $howmany = 400; | |
264 } | |
265 } | |
266 } | |
267 my @exons=$self->all_Exons; | |
268 my $firstexon=$exons[0]; | |
269 | |
270 my $upstream_seq; | |
271 my $strand=$self->strand(); | |
272 | |
273 if ($howmany) {# $howmany nucs before begin of first exon | |
274 my $labelbefore=$firstexon->label(-$howmany,$firstexon->start); | |
275 if ($labelbefore < 1) { | |
276 if ($strand == 1) { | |
277 $labelbefore=$self->{'seq'}->start; | |
278 } else { | |
279 $labelbefore=$self->{'seq'}->end; | |
280 } | |
281 } | |
282 $upstream_seq=$firstexon->labelsubseq($labelbefore,undef,$firstexon->start,"unsecuremoderequested"); | |
283 chop $upstream_seq; | |
284 } else { | |
285 if ($strand == 1) { | |
286 $upstream_seq=$firstexon->labelsubseq($self->{'seq'}->start,undef,$self->start,"unsecuremoderequested"); | |
287 chop $upstream_seq; # delete last nucleotide that is the A of starting ATG | |
288 } else { | |
289 $upstream_seq=$firstexon->labelsubseq($self->{'seq'}->end,undef,$self->start,"unsecuremoderequested"); | |
290 chop $upstream_seq; # delete last nucleotide that is the A of starting ATG | |
291 } | |
292 } | |
293 return $upstream_seq; | |
294 } | |
295 | |
296 # These get redefined here, overriding the SeqI one because they draw their | |
297 # information from the Exons a Transcript is built of | |
298 # optional argument: firstlabel. If not given, it checks coordinate_start | |
299 # This is useful when called by Translation | |
300 # also used by _delete | |
301 sub label { | |
302 my ($self,$position,$firstlabel)=@_; | |
303 unless ($position) { # if position = 0 complain ? | |
304 $self->warn("Position not given or position 0"); | |
305 return (-1); | |
306 } | |
307 my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand()); | |
308 my ($label,@labels,$length,$arraypos); | |
309 unless (defined ($firstlabel)) { | |
310 $firstlabel=$self->coordinate_start; # this is inside Transcript obj | |
311 } | |
312 my $coord_pos=$self->_inside_position($firstlabel); | |
313 $length=$self->length; | |
314 #if ($strand == 1) { | |
315 if ($position < 1) { | |
316 $position++; # to account for missing of 0 position | |
317 } | |
318 $arraypos=$position+$coord_pos-2; | |
319 #print "\n=-=-=-=-DEBUG: arraypos $arraypos, pos $position, coordpos: $coord_pos"; | |
320 if ($arraypos < 0) { | |
321 $label=$self->{'seq'}->label($arraypos,$start,$strand); #? | |
322 } elsif ($arraypos >= $length) { | |
323 $label=$self->{'seq'}->label($arraypos-$length+2,$end,$strand); #? | |
324 } else { # inside the Transcript | |
325 @labels=$self->all_labels; | |
326 $label=$labels[$arraypos]; | |
327 } | |
328 #} | |
329 } | |
330 | |
331 # argument: label | |
332 # returns: position of label according to coord_start | |
333 # errorcode: 0 label not found | |
334 # optional argument: firstlabel. If not given, it checks coordinate_start | |
335 # This is useful when called by Translation | |
336 sub position { | |
337 my ($self,$label,$firstlabel)=@_; | |
338 unless ($self->{'seq'}->valid($label)) { | |
339 $self->warn("label is not valid"); | |
340 return (0); | |
341 } | |
342 unless (defined ($firstlabel)) { | |
343 $firstlabel=$self->coordinate_start; # this is inside Transcript obj | |
344 } | |
345 if ($label == $firstlabel) { | |
346 return (1); | |
347 } | |
348 my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand()); | |
349 my ($position,$in_pos,$out_pos,$coord_pos); | |
350 my $length=$self->length; | |
351 $coord_pos=$self->_inside_position($firstlabel); | |
352 if ($self->valid($label)) { # if label is inside the Transcript | |
353 $in_pos=$self->_inside_position($label); | |
354 $position=$in_pos-$coord_pos+1; | |
355 if ($position <= 0) { | |
356 return ($position-1); # accounts for the missing of the 0 position | |
357 } | |
358 } else { | |
359 if ($self->follows($end,$label)) { # label after end of transcript | |
360 $out_pos=$self->{'seq'}->position($label,$end,$strand); | |
361 #print "\n+++++++++DEBUG label $label FOLLOWS end $end outpos $out_pos coordpos $coord_pos"; | |
362 $position=$out_pos+$length-$coord_pos; | |
363 } elsif ($self->follows($label,$start)) { # label before begin of transcript | |
364 #print "\n+++++++++DEBUG label $label BEFORE start $start outpos $out_pos coordpos $coord_pos"; | |
365 $out_pos=$self->{'seq'}->position($label,$start,$strand); | |
366 $position=$out_pos-$coord_pos+1; | |
367 } else { # label is in intron (not valid, not after, not before)! | |
368 $self->warn("Cannot give position of label pointing to intron according to CDS numbering!",1); | |
369 return (0); | |
370 } | |
371 } | |
372 return ($position); | |
373 } | |
374 | |
375 sub seq { | |
376 my $self=shift; | |
377 my ($exon,$str); | |
378 my @exons=$self->all_Exons(); | |
379 foreach $exon (@exons) { | |
380 $str .= $exon->seq(); | |
381 } | |
382 return $str; | |
383 } | |
384 | |
385 sub length { | |
386 my $self=shift; | |
387 my ($exon,$length); | |
388 my @exons=$self->all_Exons(); | |
389 foreach $exon (@exons) { | |
390 $length += $exon->length(); | |
391 } | |
392 return $length; | |
393 } | |
394 | |
395 sub all_labels { | |
396 my $self=shift; | |
397 my ($exon,@labels); | |
398 my @exons=$self->all_Exons(); | |
399 foreach $exon (@exons) { | |
400 push (@labels,$exon->all_labels()); | |
401 } | |
402 return @labels; | |
403 } | |
404 | |
405 # redefined here so that it will retrieve effective subseq without introns | |
406 # otherwise it would have retrieved an underlying DNA (possibly with introns) | |
407 # subsequence | |
408 # Drawback: this is really bulky, label->position and then a call to | |
409 # subseq that will do the opposite position-> label | |
410 # | |
411 # one day this can be rewritten as the main one so that the normal subseq | |
412 # will rely on this one and hence avoid this double (useless and lengthy) | |
413 # conversion between labels and positions | |
414 sub old_labelsubseq { | |
415 my ($self,$start,$length,$end)=@_; | |
416 my ($pos1,$pos2); | |
417 if ($start) { | |
418 unless ($self->valid($start)) { | |
419 $self->warn("Start label not valid"); return (-1); | |
420 } | |
421 $pos1=$self->position($start); | |
422 } | |
423 if ($end) { | |
424 if ($end == $start) { | |
425 $length=1; | |
426 } else { | |
427 unless ($self->valid($end)) { | |
428 $self->warn("End label not valid"); return (-1); | |
429 } | |
430 unless ($self->follows($start,$end) == 1) { | |
431 $self->warn("End label does not follow Start label!"); return (-1); | |
432 } | |
433 $pos2=$self->position($end); | |
434 undef $length; | |
435 } | |
436 } | |
437 return ($self->subseq($pos1,$pos2,$length)); | |
438 } | |
439 | |
440 # rewritten, eventually | |
441 | |
442 sub labelsubseq { | |
443 my ($self,$start,$length,$end,$unsecuremode)=@_; | |
444 unless (defined $unsecuremode && | |
445 $unsecuremode eq "unsecuremoderequested") | |
446 { # to skip security checks (faster) | |
447 if ($start) { | |
448 unless ($self->valid($start)) { | |
449 $self->warn("Start label not valid"); return (-1); | |
450 } | |
451 } else { | |
452 $start=$self->start; | |
453 } | |
454 if ($end) { | |
455 if ($end == $start) { | |
456 $length=1; | |
457 undef $end; | |
458 } else { | |
459 undef $length; # end argument overrides length argument | |
460 unless ($self->valid($end)) { | |
461 $self->warn("End label not valid"); return (-1); | |
462 } | |
463 unless ($self->follows($start,$end) == 1) { | |
464 $self->warn("End label does not follow Start label!"); return (-1); | |
465 } | |
466 } | |
467 } else { | |
468 $end=$self->end; | |
469 } | |
470 } | |
471 my ($seq,$exon,$startexon,$endexon); my @exonlabels; | |
472 my @exons=$self->all_Exons; | |
473 EXONCHECK: | |
474 foreach $exon (@exons) { | |
475 if ((!(defined($startexon)))&&($exon->valid($start))) { # checks only if not yet found | |
476 $startexon=$exon; | |
477 } | |
478 if ($exon->valid($end)) { | |
479 $endexon=$exon; | |
480 } | |
481 if ((!(defined($seq)) && (defined($startexon)))) { # initializes only once | |
482 if ((defined($endexon)) && ($endexon eq $startexon)) { # then perfect, we are finished | |
483 if ($length) { | |
484 $seq = $startexon->labelsubseq($start,$length,undef,"unsecuremoderequested"); | |
485 | |
486 | |
487 last EXONCHECK; | |
488 } else { | |
489 $seq = $startexon->labelsubseq($start,undef,$end,"unsecuremoderequested"); | |
490 } | |
491 last EXONCHECK; | |
492 } else { # get up to the end of the exon | |
493 $seq = $startexon->labelsubseq($start,undef,undef,"unsecuremoderequested"); | |
494 } | |
495 } | |
496 if (($startexon)&&($exon ne $startexon)) { | |
497 if (defined($endexon)) { # we arrived to the last exon | |
498 $seq .= $endexon->labelsubseq(undef,undef,$end,"unsecuremoderequested"); # get from the start of the exon | |
499 last EXONCHECK; | |
500 | |
501 } elsif (defined($startexon)) { # we are in a whole-exon-in-the-middle case | |
502 $seq .= $exon->seq; # we add it completely to the seq | |
503 } # else, we still have to reach the start point, exon useless, we move on | |
504 if ($length) { # if length argument specified | |
505 if (($seq && (CORE::length($seq) >= $length))) { | |
506 last EXONCHECK; | |
507 } | |
508 } | |
509 } | |
510 } | |
511 if ($length) { | |
512 return (substr($seq,0,$length)); | |
513 } else { | |
514 return ($seq); | |
515 } | |
516 } | |
517 | |
518 | |
519 # argument: label | |
520 # returns: the objref and progressive number of the Exon containing that label | |
521 # errorcode: -1 | |
522 sub in_which_Exon { | |
523 my ($self,$label)=@_; | |
524 my ($count,$exon); | |
525 my @exons=$self->all_Exons; | |
526 foreach $exon (@exons) { | |
527 $count++; # 1st exon is numbered "1" | |
528 if ($exon->valid($label)) { | |
529 return ($exon,$count) | |
530 } | |
531 } | |
532 return (-1); # if nothing found | |
533 } | |
534 | |
535 # recoded to exploit the new fast labelsubseq() | |
536 # valid only inside Transcript | |
537 sub subseq { | |
538 my ($self,$pos1,$pos2,$length) = @_; | |
539 my ($str,$startlabel,$endlabel); | |
540 if (defined ($pos1)) { | |
541 if ($pos1 == 0) { # if position = 0 complain | |
542 $self->warn("Position cannot be 0!"); return (-1); | |
543 } | |
544 if ((defined ($pos2))&&($pos1>$pos2)) { | |
545 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1); | |
546 } | |
547 $startlabel=$self->label($pos1); | |
548 unless ($self->valid($startlabel)) { | |
549 $self->warn("Start label not valid"); return (-1); | |
550 } | |
551 if ($startlabel < 1) { | |
552 $self->warn("position $pos1 not valid as start of subseq!"); return (-1); | |
553 } | |
554 } else { | |
555 $startlabel=$self->start; | |
556 } | |
557 if (defined ($pos2)) { | |
558 if ($pos2 == 0) { # if position = 0 complain | |
559 $self->warn("Position cannot be 0!"); return (-1); | |
560 } | |
561 undef $length; | |
562 if ((defined ($pos1))&&($pos1>$pos2)) { | |
563 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1); | |
564 } | |
565 $endlabel=$self->label($pos2); | |
566 unless ($self->valid($endlabel)) { | |
567 $self->warn("End label not valid"); return (-1); | |
568 } | |
569 if ($endlabel < 1) { | |
570 $self->warn("position $pos2 not valid as end of subseq!"); return (-1); | |
571 } | |
572 } else { | |
573 unless (defined ($length)) { | |
574 $endlabel=$self->end; | |
575 } | |
576 } | |
577 return ($self->labelsubseq($startlabel,$length,$endlabel,"unsecuremoderequested")); | |
578 } | |
579 | |
580 # works only inside the transcript, complains if asked outside | |
581 sub old_subseq { | |
582 my ($self,$pos1,$pos2,$length) = @_; | |
583 my ($str,$startcount,$endcount,$seq,$seqlength); | |
584 if (defined ($length)) { | |
585 if ($length < 1) { | |
586 $self->warn("No sense asking for a subseq of length < 1"); | |
587 return (-1); | |
588 } | |
589 } | |
590 my $firstlabel=$self->coordinate_start; # this is inside Transcript obj | |
591 my $coord_pos=$self->_inside_position($firstlabel); # TESTME old | |
592 $seq=$self->seq; | |
593 $seqlength=CORE::length($seq); | |
594 unless (defined ($pos1)) { | |
595 $startcount=1+$coord_pos-1; # i.e. coord_pos | |
596 } else { | |
597 if ($pos1 == 0) { # if position = 0 complain | |
598 $self->warn("Position cannot be 0!"); return (-1); | |
599 } elsif ($pos1 < 0) { | |
600 $pos1++; | |
601 } | |
602 if ((defined ($pos2))&&($pos1>$pos2)) { | |
603 $self->warn("1st position ($pos1) cannot be > 2nd position ($pos2)!"); | |
604 return (-1); | |
605 } | |
606 $startcount=$pos1+$coord_pos-1; | |
607 } | |
608 unless (defined ($pos2)) { | |
609 ; | |
610 } else { | |
611 if ($pos2 == 0) { # if position = 0 complain | |
612 $self->warn("Position cannot be 0!"); return (-1); | |
613 } elsif ($pos2 < 0) { | |
614 $pos2++; | |
615 } | |
616 if ((defined ($pos1))&&($pos1>$pos2)) { | |
617 $self->warn("1st position ($pos1) cannot be > 2nd position ($pos2)!"); | |
618 return (-1); | |
619 } | |
620 $endcount=$pos2+$coord_pos-1; | |
621 if ($endcount > $seqlength) { | |
622 #print "\n###DEBUG###: pos1 $pos1 pos2 $pos2 coordpos $coord_pos endcount $endcount seqln $seqlength\n"; | |
623 $self->warn("Cannot access end position after the end of Transcript"); | |
624 return (-1); | |
625 } | |
626 $length=$endcount-$startcount+1; | |
627 } | |
628 #print "\n###DEBUG pos1 $pos1 pos2 $pos2 start $startcount end $endcount length $length coordpos $coord_pos\n"; | |
629 my $offset=$startcount-1; | |
630 if ($offset < 0) { | |
631 $self->warn("Cannot access startposition before the beginning of Transcript, returning from start",1); # ignorable | |
632 return (substr($seq,0,$length)); | |
633 } elsif ($offset >= $seqlength) { | |
634 $self->warn("Cannot access startposition after the end of Transcript"); | |
635 return (-1); | |
636 } else { | |
637 $str=substr($seq,$offset,$length); | |
638 if (CORE::length($str) < $length) { | |
639 $self->warn("Attention, cannot return the length requested ". | |
640 "for subseq",1) if $self->verbose > 0; # ignorable | |
641 } | |
642 return $str; | |
643 } | |
644 } | |
645 | |
646 # redefined so that it doesn't require other methods (after deletions) to | |
647 # reset it. | |
648 sub start { | |
649 my $self = shift; | |
650 my $exonsref=$self->{'exons'}; | |
651 my @exons=@{$exonsref}; | |
652 return ($exons[0]->start); | |
653 } | |
654 | |
655 sub end { | |
656 my $self = shift; | |
657 my $exonsref=$self->{'exons'}; | |
658 my @exons=@{$exonsref}; | |
659 return ($exons[-1]->end); | |
660 } | |
661 | |
662 | |
663 # internal methods begin here | |
664 | |
665 # returns: position of label in transcript's all_labels | |
666 # with STARTlabel == 1 | |
667 # errorcode 0 -> label not found | |
668 # argument: label | |
669 sub _inside_position { | |
670 my ($self,$label)=@_; | |
671 my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand()); | |
672 my ($position,$checkme); | |
673 my @labels=$self->all_labels; | |
674 foreach $checkme (@labels) { | |
675 $position++; | |
676 if ($label == $checkme) { | |
677 return ($position); | |
678 } | |
679 } | |
680 return (0); | |
681 } | |
682 | |
683 # returns 1 OK or 0 ERROR | |
684 # arguments: reference to array of Exon object references | |
685 sub _checkexons { | |
686 my ($exon,$thisstart); | |
687 my $self=$exon; | |
688 my $exonsref=$_[0]; | |
689 my @exons=@{$exonsref}; | |
690 | |
691 my $firstexon = $exons[0]; | |
692 | |
693 unless (ref($firstexon) eq "Bio::LiveSeq::Exon") { | |
694 $self->warn("Object not of class Exon"); | |
695 return (0); | |
696 } | |
697 my $strand = $firstexon->strand; | |
698 | |
699 my $prevend = $firstexon->end; | |
700 shift @exons; # skip first one | |
701 foreach $exon (@exons) { | |
702 unless (ref($exon) eq "Bio::LiveSeq::Exon") { # object class check | |
703 $self->warn("Object not of class Exon"); | |
704 return (0); | |
705 } | |
706 if ($exon->strand != $strand) { # strand consistency check | |
707 $self->warn("Exons' strands not consistent when trying to create Transcript"); | |
708 return (0); | |
709 } | |
710 $thisstart = $exon->start; | |
711 unless ($exon->{'seq'}->follows($prevend,$thisstart,$strand)) { | |
712 $self->warn("Exons not in correct order when trying to create Transcript"); | |
713 return (0); | |
714 } | |
715 $prevend = $exon->end; | |
716 } | |
717 return (1); | |
718 } | |
719 | |
720 =head2 get_Translation | |
721 | |
722 Title : valid | |
723 Usage : $translation = $obj->get_Translation() | |
724 Function: retrieves the reference to the object of class Translation (if any) | |
725 attached to a LiveSeq object | |
726 Returns : object reference | |
727 Args : none | |
728 | |
729 =cut | |
730 | |
731 sub get_Translation { | |
732 my $self=shift; | |
733 return ($self->{'translation'}); # this is set when Translation->new is called | |
734 } | |
735 | |
736 # this checks so that deletion spanning multiple exons is | |
737 # handled accordingly and correctly | |
738 # arguments: begin and end label of a deletion | |
739 # this is called BEFORE any deletion in the chain | |
740 sub _deletecheck { | |
741 my ($self,$startlabel,$endlabel)=@_; | |
742 my $exonsref=$self->{'exons'}; | |
743 my @exons=@{$exonsref}; | |
744 my ($startexon,$endexon,$exon); | |
745 $startexon=$endexon=0; | |
746 foreach $exon (@exons) { | |
747 if (($startexon == 0)&&($exon->valid($startlabel))) { | |
748 $startexon=$exon; # exon containing start of deletion | |
749 } | |
750 if (($endexon == 0)&&($exon->valid($endlabel))) { | |
751 $endexon=$exon; # exon containing end of deletion | |
752 } | |
753 if (($startexon)&&($endexon)) { | |
754 last; # don't check further | |
755 } | |
756 } | |
757 my $nextend=$self->label(2,$endlabel); # retrieve the next label | |
758 my $prevstart=$self->label(-1,$startlabel); # retrieve the prev label | |
759 | |
760 if ($startexon eq $endexon) { # intra-exon deletion | |
761 if (($startexon->start eq $startlabel) && ($startexon->end eq $endlabel)) { | |
762 # let's delete the entire exon | |
763 my @newexons; | |
764 foreach $exon (@exons) { | |
765 unless ($exon eq $startexon) { | |
766 push(@newexons,$exon); | |
767 } | |
768 } | |
769 $self->{'exons'}=\@newexons; | |
770 } elsif ($startexon->start eq $startlabel) { # special cases | |
771 $startexon->{'start'}=$nextend; # set a new start of exon | |
772 } elsif ($startexon->end eq $endlabel) { | |
773 $startexon->{'end'}=$prevstart; # set a new end of exon | |
774 } else { | |
775 return; # no problem | |
776 } | |
777 } else { # two new exons to be created, inter-exons deletion | |
778 my @newexons; | |
779 my $exonobj; | |
780 my $dna=$self->{'seq'}; | |
781 my $strand=$self->strand; | |
782 my $notmiddle=1; # flag for skipping exons in the middle of deletion | |
783 foreach $exon (@exons) { | |
784 if ($exon eq $startexon) { | |
785 $exonobj=Bio::LiveSeq::Exon->new('-seq'=>$dna,'-start'=>$exon->start,'-end'=>$prevstart,'-strand'=>$strand); # new partial exon | |
786 push(@newexons,$exonobj); | |
787 $notmiddle=0; # now we enter totally deleted exons | |
788 } elsif ($exon eq $endexon) { | |
789 $exonobj=Bio::LiveSeq::Exon->new('-seq'=>$dna,'-start'=>$nextend,'-end'=>$exon->end,'-strand'=>$strand); # new partial exon | |
790 push(@newexons,$exonobj); | |
791 $notmiddle=1; # exiting totally deleted exons | |
792 } else { | |
793 if ($notmiddle) { # if before or after exons with deletion | |
794 push(@newexons,$exon); | |
795 }# else skip them | |
796 } | |
797 } | |
798 $self->{'exons'}=\@newexons; | |
799 } | |
800 } | |
801 | |
802 =head2 translation_table | |
803 | |
804 Title : translation_table | |
805 Usage : $name = $obj->translation_table; | |
806 : $name = $obj->translation_table(11); | |
807 Function: Returns or sets the translation_table used for translating the | |
808 transcript. | |
809 If it has never been set, it will return undef. | |
810 Returns : an integer | |
811 | |
812 =cut | |
813 | |
814 sub translation_table { | |
815 my ($self,$value) = @_; | |
816 if (defined $value) { | |
817 $self->{'translation_table'} = $value; | |
818 } | |
819 unless (exists $self->{'translation_table'}) { | |
820 return (undef); | |
821 } else { | |
822 return $self->{'translation_table'}; | |
823 } | |
824 } | |
825 | |
826 =head2 frame | |
827 | |
828 Title : frame | |
829 Usage : $frame = $transcript->frame($label); | |
830 Function: Returns the frame of a particular nucleotide. | |
831 Frame can be 0 1 or 2 and means the position in the codon triplet | |
832 of the particulat nucleotide. 0 is the first codon_position. | |
833 Codon_position (1 2 3) is simply frame+1. | |
834 If the label asked for is not inside the Transcript, -1 will be | |
835 returned. | |
836 Args : a label | |
837 Returns : 0 1 or 2 | |
838 Errorcode -1 | |
839 | |
840 =cut | |
841 | |
842 # args: label | |
843 # returns: frame of nucleotide (0 1 2) | |
844 # errorcode: -1 | |
845 sub frame { | |
846 my ($self,$inputlabel)=@_; | |
847 my @labels=$self->all_labels; | |
848 my ($label,$frame,$count); | |
849 foreach $label (@labels) { | |
850 if ($inputlabel == $label) { | |
851 return ($count % 3); | |
852 } | |
853 $count++; # 0 1 2 3 4.... | |
854 } | |
855 return (-1); # label not found amid Transcript labels | |
856 } | |
857 | |
858 1; |