Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/EnsEMBL/Compara/Member.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 package Bio::EnsEMBL::Compara::Member; | |
2 | |
3 use strict; | |
4 use Bio::Seq; | |
5 use Bio::EnsEMBL::Utils::Argument; | |
6 use Bio::EnsEMBL::Utils::Exception; | |
7 use Bio::EnsEMBL::Gene; | |
8 use Bio::EnsEMBL::Compara::GenomeDB; | |
9 | |
10 | |
11 =head2 new (CONSTRUCTOR) | |
12 | |
13 Arg [-DBID] : (opt) | |
14 : int $dbID (the database internal ID for this object) | |
15 Arg [-ADAPTOR] | |
16 : Bio::EnsEMBL::Compara::DBSQL::Member $adaptor | |
17 (the adaptor for connecting to the database) | |
18 Arg [-DESCRIPTION] (opt) | |
19 : string $description | |
20 Arg [-SOURCE_NAME] (opt) | |
21 : string $source_name | |
22 (e.g., "ENSEMBLGENE", "ENSEMBLPEP", "Uniprot/SWISSPROT", "Uniprot/SPTREMBL") | |
23 Arg [-TAXON_ID] (opt) | |
24 : int $taxon_id | |
25 (NCBI taxonomy id for the member) | |
26 Arg [-GENOME_DB_ID] (opt) | |
27 : int $genome_db_id | |
28 (the $genome_db->dbID for a species in the database) | |
29 Arg [-SEQUENCE_ID] (opt) | |
30 : int $sequence_id | |
31 (the $sequence_id for the sequence table in the database) | |
32 Example : | |
33 my $member = new Bio::EnsEMBL::Compara::Member; | |
34 Description: Creates a new Member object | |
35 Returntype : Bio::EnsEMBL::Compara::Member | |
36 Exceptions : none | |
37 Caller : general | |
38 Status : Stable | |
39 | |
40 =cut | |
41 | |
42 sub new { | |
43 my ($class, @args) = @_; | |
44 | |
45 my $self = bless {}, $class; | |
46 | |
47 if (scalar @args) { | |
48 #do this explicitly. | |
49 my ($dbid, $stable_id, $description, $source_name, $adaptor, $taxon_id, $genome_db_id, $sequence_id) = rearrange([qw(DBID STABLE_ID DESCRIPTION SOURCE_NAME ADAPTOR TAXON_ID GENOME_DB_ID SEQUENCE_ID)], @args); | |
50 | |
51 $dbid && $self->dbID($dbid); | |
52 $stable_id && $self->stable_id($stable_id); | |
53 $description && $self->description($description); | |
54 $source_name && $self->source_name($source_name); | |
55 $adaptor && $self->adaptor($adaptor); | |
56 $taxon_id && $self->taxon_id($taxon_id); | |
57 $genome_db_id && $self->genome_db_id($genome_db_id); | |
58 $sequence_id && $self->sequence_id($sequence_id); | |
59 } | |
60 | |
61 return $self; | |
62 } | |
63 | |
64 | |
65 =head2 copy | |
66 | |
67 Arg [1] : object $parent_object (optional) | |
68 Example : | |
69 Description: copies the object, optionally by topping up a given structure (to support multiple inheritance) | |
70 Returntype : | |
71 Exceptions : | |
72 Caller : | |
73 | |
74 =cut | |
75 | |
76 sub copy { | |
77 my $self = shift; | |
78 | |
79 my $mycopy = @_ ? shift : {}; | |
80 bless $mycopy, 'Bio::EnsEMBL::Compara::Member'; | |
81 | |
82 $mycopy->dbID($self->dbID); | |
83 $mycopy->stable_id($self->stable_id); | |
84 $mycopy->version($self->version); | |
85 $mycopy->description($self->description); | |
86 $mycopy->source_name($self->source_name); | |
87 #$mycopy->adaptor($self->adaptor); | |
88 $mycopy->chr_name($self->chr_name); | |
89 $mycopy->chr_start($self->chr_start); | |
90 $mycopy->chr_end($self->chr_end); | |
91 $mycopy->chr_strand($self->chr_strand); | |
92 $mycopy->taxon_id($self->taxon_id); | |
93 $mycopy->genome_db_id($self->genome_db_id); | |
94 $mycopy->sequence_id($self->sequence_id); | |
95 $mycopy->gene_member_id($self->gene_member_id); | |
96 $mycopy->display_label($self->display_label); | |
97 | |
98 return $mycopy; | |
99 } | |
100 | |
101 | |
102 =head2 new_fast | |
103 | |
104 Arg [1] : hash reference $hashref | |
105 Example : none | |
106 Description: This is an ultra fast constructor which requires knowledge of | |
107 the objects internals to be used. | |
108 Returntype : | |
109 Exceptions : none | |
110 Caller : | |
111 | |
112 =cut | |
113 | |
114 sub new_fast { | |
115 my ($class, $hashref) = @_; | |
116 | |
117 return bless $hashref, $class; | |
118 } | |
119 | |
120 =head2 new_from_gene | |
121 | |
122 Args : Requires both an Bio::Ensembl:Gene object and a | |
123 : Bio::Ensembl:Compara:GenomeDB object | |
124 Example : $member = Bio::EnsEMBL::Compara::Member->new_from_gene( | |
125 -gene => $gene, | |
126 -genome_db => $genome_db); | |
127 Description: contructor method which takes an Ensembl::Gene object | |
128 and Compara:GenomeDB object and creates a new Member object | |
129 translating from the Gene object | |
130 Returntype : Bio::Ensembl::Compara::Member | |
131 Exceptions : | |
132 Caller : | |
133 | |
134 =cut | |
135 | |
136 sub new_from_gene { | |
137 my ($class, @args) = @_; | |
138 my $self = $class->new(@args); | |
139 | |
140 if (scalar @args) { | |
141 | |
142 my ($gene, $genome_db) = rearrange([qw(GENE GENOME_DB)], @args); | |
143 | |
144 unless(defined($gene) and $gene->isa('Bio::EnsEMBL::Gene')) { | |
145 throw( | |
146 "gene arg must be a [Bio::EnsEMBL::Gene] ". | |
147 "not a [$gene]"); | |
148 } | |
149 unless(defined($genome_db) and $genome_db->isa('Bio::EnsEMBL::Compara::GenomeDB')) { | |
150 throw( | |
151 "genome_db arg must be a [Bio::EnsEMBL::Compara::GenomeDB] ". | |
152 "not a [$genome_db]"); | |
153 } | |
154 unless (defined $gene->stable_id) { | |
155 throw("COREDB error: does not contain gene_stable_id for gene_id ". $gene->dbID."\n"); | |
156 } | |
157 | |
158 $self->stable_id($gene->stable_id); | |
159 $self->taxon_id($genome_db->taxon_id); | |
160 $self->description($gene->description); | |
161 $self->genome_db_id($genome_db->dbID); | |
162 $self->chr_name($gene->seq_region_name); | |
163 $self->chr_start($gene->seq_region_start); | |
164 $self->chr_end($gene->seq_region_end); | |
165 $self->chr_strand($gene->seq_region_strand); | |
166 $self->source_name("ENSEMBLGENE"); | |
167 $self->version($gene->version); | |
168 } | |
169 return $self; | |
170 } | |
171 | |
172 | |
173 =head2 new_from_transcript | |
174 | |
175 Arg[1] : Bio::Ensembl:Transcript object | |
176 Arg[2] : Bio::Ensembl:Compara:GenomeDB object | |
177 Arg[3] : string where value='translate' causes transcript object to translate | |
178 to a peptide | |
179 Example : $member = Bio::EnsEMBL::Compara::Member->new_from_transcript( | |
180 $transcript, $genome_db, | |
181 -translate); | |
182 Description: contructor method which takes an Ensembl::Gene object | |
183 and Compara:GenomeDB object and creates a new Member object | |
184 translating from the Gene object | |
185 Returntype : Bio::Ensembl::Compara::Member | |
186 Exceptions : | |
187 Caller : | |
188 | |
189 =cut | |
190 | |
191 sub new_from_transcript { | |
192 my ($class, @args) = @_; | |
193 my $self = $class->new(@args); | |
194 my $peptideBioSeq; | |
195 my $seq_string; | |
196 | |
197 my ($transcript, $genome_db, $translate, $description) = rearrange([qw(TRANSCRIPT GENOME_DB TRANSLATE DESCRIPTION)], @args); | |
198 #my ($transcript, $genome_db, $translate) = @args; | |
199 | |
200 unless(defined($transcript) and $transcript->isa('Bio::EnsEMBL::Transcript')) { | |
201 throw( | |
202 "transcript arg must be a [Bio::EnsEMBL::Transcript]". | |
203 "not a [$transcript]"); | |
204 } | |
205 unless(defined($genome_db) and $genome_db->isa('Bio::EnsEMBL::Compara::GenomeDB')) { | |
206 throw( | |
207 "genome_db arg must be a [Bio::EnsEMBL::Compara::GenomeDB] ". | |
208 "not a [$genome_db]"); | |
209 } | |
210 $self->taxon_id($genome_db->taxon_id); | |
211 if(defined($description)) { $self->description($description); } | |
212 else { $self->description("NULL"); } | |
213 $self->genome_db_id($genome_db->dbID); | |
214 $self->chr_name($transcript->seq_region_name); | |
215 $self->chr_start($transcript->coding_region_start); | |
216 $self->chr_end($transcript->coding_region_end); | |
217 $self->chr_strand($transcript->seq_region_strand); | |
218 $self->version($transcript->translation->version) if ($translate eq 'yes'); | |
219 | |
220 if(($translate eq 'translate') or ($translate eq 'yes')) { | |
221 if(not defined($transcript->translation)) { | |
222 throw("request to translate a transcript without a defined translation", | |
223 $transcript->stable_id); | |
224 } | |
225 unless (defined $transcript->translation->stable_id) { | |
226 throw("COREDB error: does not contain translation stable id for translation_id ".$transcript->translation->dbID."\n"); | |
227 } | |
228 | |
229 $self->stable_id($transcript->translation->stable_id); | |
230 $self->source_name("ENSEMBLPEP"); | |
231 | |
232 unless ($peptideBioSeq = $transcript->translate) { | |
233 throw("COREDB error: unable to get a BioSeq translation from ". $transcript->stable_id); | |
234 } | |
235 eval { | |
236 $seq_string = $peptideBioSeq->seq; | |
237 }; | |
238 throw "COREDB error: can't get seq from peptideBioSeq" if $@; | |
239 # OR | |
240 #$seq_string = $transcript->translation->seq; | |
241 | |
242 if ($seq_string =~ /^X+$/) { | |
243 warn("X+ in sequence from translation " . $transcript->translation->stable_id."\n"); | |
244 } | |
245 elsif (length($seq_string) == 0) { | |
246 warn("zero length sequence from translation " . $transcript->translation->stable_id."\n"); | |
247 } | |
248 else { | |
249 #$seq_string =~ s/(.{72})/$1\n/g; | |
250 $self->sequence($seq_string); | |
251 } | |
252 } elsif ($translate eq 'ncrna') { | |
253 unless (defined $transcript->stable_id) { | |
254 throw("COREDB error: does not contain transcript stable id for transcript_id ".$transcript->dbID."\n"); | |
255 } | |
256 $self->stable_id($transcript->stable_id); | |
257 $self->source_name("ENSEMBLTRANS"); | |
258 | |
259 unless ($seq_string = $transcript->spliced_seq) { | |
260 throw("COREDB error: unable to get a BioSeq spliced_seq from ". $transcript->stable_id); | |
261 } | |
262 if (length($seq_string) == 0) { | |
263 warn("zero length sequence from transcript " . $transcript->stable_id."\n"); | |
264 } | |
265 $self->sequence($seq_string); | |
266 } | |
267 | |
268 #print("Member->new_from_transcript\n"); | |
269 #print(" source_name = '" . $self->source_name . "'\n"); | |
270 #print(" stable_id = '" . $self->stable_id . "'\n"); | |
271 #print(" taxon_id = '" . $self->taxon_id . "'\n"); | |
272 #print(" chr_name = '" . $self->chr_name . "'\n"); | |
273 return $self; | |
274 } | |
275 | |
276 | |
277 =head2 member_id | |
278 | |
279 Arg [1] : int $member_id (optional) | |
280 Example : | |
281 Description: | |
282 Returntype : | |
283 Exceptions : | |
284 Caller : | |
285 | |
286 =cut | |
287 | |
288 sub member_id { | |
289 my $self = shift; | |
290 return $self->dbID(@_); | |
291 } | |
292 | |
293 | |
294 =head2 dbID | |
295 | |
296 Arg [1] : int $dbID (optional) | |
297 Example : | |
298 Description: | |
299 Returntype : | |
300 Exceptions : | |
301 Caller : | |
302 | |
303 =cut | |
304 | |
305 sub dbID { | |
306 my $self = shift; | |
307 $self->{'_dbID'} = shift if(@_); | |
308 return $self->{'_dbID'}; | |
309 } | |
310 | |
311 =head2 stable_id | |
312 | |
313 Arg [1] : string $stable_id (optional) | |
314 Example : | |
315 Description: | |
316 Returntype : | |
317 Exceptions : | |
318 Caller : | |
319 | |
320 =cut | |
321 | |
322 sub stable_id { | |
323 my $self = shift; | |
324 $self->{'_stable_id'} = shift if(@_); | |
325 return $self->{'_stable_id'}; | |
326 } | |
327 | |
328 =head2 display_label | |
329 | |
330 Arg [1] : string $display_label (optional) | |
331 Example : | |
332 Description: | |
333 Returntype : | |
334 Exceptions : | |
335 Caller : | |
336 | |
337 =cut | |
338 | |
339 sub display_label { | |
340 my $self = shift; | |
341 $self->{'_display_label'} = shift if(@_); | |
342 return $self->{'_display_label'}; | |
343 } | |
344 | |
345 =head2 version | |
346 | |
347 Arg [1] : | |
348 Example : | |
349 Description: | |
350 Returntype : | |
351 Exceptions : | |
352 Caller : | |
353 | |
354 =cut | |
355 | |
356 sub version { | |
357 my $self = shift; | |
358 $self->{'_version'} = shift if(@_); | |
359 $self->{'_version'} = 0 unless(defined($self->{'_version'})); | |
360 return $self->{'_version'}; | |
361 } | |
362 | |
363 =head2 description | |
364 | |
365 Arg [1] : string $description (optional) | |
366 Example : | |
367 Description: | |
368 Returntype : string | |
369 Exceptions : | |
370 Caller : | |
371 | |
372 =cut | |
373 | |
374 sub description { | |
375 my $self = shift; | |
376 $self->{'_description'} = shift if(@_); | |
377 return $self->{'_description'}; | |
378 } | |
379 | |
380 =head2 source_name | |
381 | |
382 =cut | |
383 | |
384 sub source_name { | |
385 my $self = shift; | |
386 $self->{'_source_name'} = shift if (@_); | |
387 return $self->{'_source_name'}; | |
388 } | |
389 | |
390 =head2 adaptor | |
391 | |
392 Arg [1] : string $adaptor (optional) | |
393 corresponding to a perl module | |
394 Example : | |
395 Description: | |
396 Returntype : | |
397 Exceptions : | |
398 Caller : | |
399 | |
400 =cut | |
401 | |
402 sub adaptor { | |
403 my $self = shift; | |
404 $self->{'_adaptor'} = shift if(@_); | |
405 return $self->{'_adaptor'}; | |
406 } | |
407 | |
408 =head2 chr_name | |
409 | |
410 =cut | |
411 | |
412 sub chr_name { | |
413 my $self = shift; | |
414 $self->{'_chr_name'} = shift if (@_); | |
415 return $self->{'_chr_name'}; | |
416 } | |
417 | |
418 =head2 chr_start | |
419 | |
420 =cut | |
421 | |
422 sub chr_start { | |
423 my $self = shift; | |
424 $self->{'_chr_start'} = shift if (@_); | |
425 return $self->{'_chr_start'}; | |
426 } | |
427 | |
428 =head2 chr_end | |
429 | |
430 =cut | |
431 | |
432 sub chr_end { | |
433 my $self = shift; | |
434 $self->{'_chr_end'} = shift if (@_); | |
435 return $self->{'_chr_end'}; | |
436 } | |
437 | |
438 =head2 chr_strand | |
439 | |
440 Arg [1] : integer | |
441 Description: Returns the strand of the member. Defined strands are 1 or -1. | |
442 0 is undefined strand. | |
443 Returntype : 1,0,-1 | |
444 Exceptions : none | |
445 Caller : general | |
446 | |
447 =cut | |
448 | |
449 sub chr_strand { | |
450 my $self = shift; | |
451 $self->{'_chr_strand'} = shift if (@_); | |
452 $self->{'_chr_strand'}='0' unless(defined($self->{'_chr_strand'})); | |
453 return $self->{'_chr_strand'}; | |
454 } | |
455 | |
456 =head2 taxon_id | |
457 | |
458 =cut | |
459 | |
460 sub taxon_id { | |
461 my $self = shift; | |
462 $self->{'_taxon_id'} = shift if (@_); | |
463 return $self->{'_taxon_id'}; | |
464 } | |
465 | |
466 =head2 taxon | |
467 | |
468 =cut | |
469 | |
470 sub taxon { | |
471 my $self = shift; | |
472 | |
473 if (@_) { | |
474 my $taxon = shift; | |
475 unless ($taxon->isa('Bio::EnsEMBL::Compara::NCBITaxon')) { | |
476 throw( | |
477 "taxon arg must be a [Bio::EnsEMBL::Compara::NCBITaxon". | |
478 "not a [$taxon]"); | |
479 } | |
480 $self->{'_taxon'} = $taxon; | |
481 $self->taxon_id($taxon->ncbi_taxid); | |
482 } else { | |
483 unless (defined $self->{'_taxon'}) { | |
484 unless (defined $self->taxon_id) { | |
485 throw("can't fetch Taxon without a taxon_id"); | |
486 } | |
487 my $NCBITaxonAdaptor = $self->adaptor->db->get_NCBITaxonAdaptor; | |
488 $self->{'_taxon'} = $NCBITaxonAdaptor->fetch_node_by_taxon_id($self->taxon_id); | |
489 } | |
490 } | |
491 | |
492 return $self->{'_taxon'}; | |
493 } | |
494 | |
495 =head2 genome_db_id | |
496 | |
497 =cut | |
498 | |
499 sub genome_db_id { | |
500 my $self = shift; | |
501 $self->{'_genome_db_id'} = shift if (@_); | |
502 return $self->{'_genome_db_id'}; | |
503 } | |
504 | |
505 =head2 genome_db | |
506 | |
507 =cut | |
508 | |
509 sub genome_db { | |
510 my $self = shift; | |
511 | |
512 if (@_) { | |
513 my $genome_db = shift; | |
514 unless ($genome_db->isa('Bio::EnsEMBL::Compara::GenomeDB')) { | |
515 throw( | |
516 "arg must be a [Bio::EnsEMBL::Compara::GenomeDB". | |
517 "not a [$genome_db]"); | |
518 } | |
519 $self->{'_genome_db'} = $genome_db; | |
520 $self->genome_db_id($genome_db->dbID); | |
521 } else { | |
522 unless (defined $self->{'_genome_db'}) { | |
523 unless (defined $self->genome_db_id and defined $self->adaptor) { | |
524 throw("can't fetch GenomeDB without an adaptor and genome_db_id"); | |
525 } | |
526 my $GenomeDBAdaptor = $self->adaptor->db->get_GenomeDBAdaptor; | |
527 $self->{'_genome_db'} = $GenomeDBAdaptor->fetch_by_dbID($self->genome_db_id); | |
528 } | |
529 } | |
530 | |
531 return $self->{'_genome_db'}; | |
532 } | |
533 | |
534 =head2 sequence | |
535 | |
536 Arg [1] : string $sequence | |
537 Example : my $seq = $member->sequence; | |
538 Description: Get/set the sequence string of this member | |
539 Will lazy load by sequence_id if needed and able | |
540 Returntype : string | |
541 Exceptions : none | |
542 Caller : general | |
543 | |
544 =cut | |
545 | |
546 sub sequence { | |
547 my $self = shift; | |
548 | |
549 if(@_) { | |
550 $self->{'_seq_length'} = undef; | |
551 $self->{'_sequence'} = shift; | |
552 $self->{'_seq_length'} = length($self->{'_sequence'}) if(defined($self->{'_sequence'})); | |
553 return $self->{'_sequence'}; | |
554 } | |
555 | |
556 if(!defined($self->{'_sequence'}) and | |
557 defined($self->sequence_id()) and | |
558 defined($self->adaptor)) | |
559 { | |
560 $self->{'_sequence'} = $self->adaptor->db->get_SequenceAdaptor->fetch_by_dbID($self->sequence_id); | |
561 $self->{'_seq_length'} = length($self->{'_sequence'}) if(defined($self->{'_sequence'})); | |
562 } | |
563 | |
564 return $self->{'_sequence'}; | |
565 } | |
566 | |
567 =head2 sequence_exon_cased | |
568 | |
569 Args : none | |
570 Example : my $sequence_exon_cased = $member->sequence_exon_cased; | |
571 | |
572 Description: Get/set the sequence string of this peptide member with | |
573 alternating upper and lower case corresponding to the translateable exons. | |
574 Returntype : string | |
575 Exceptions : none | |
576 Caller : general | |
577 | |
578 =cut | |
579 | |
580 sub sequence_exon_cased { | |
581 my $self = shift; | |
582 | |
583 my $sequence = $self->sequence; | |
584 my $trans = $self->get_Transcript; | |
585 my @exons = @{$trans->get_all_translateable_Exons}; | |
586 return $sequence if (1 == scalar @exons); | |
587 | |
588 my %splice_site; | |
589 my $pep_len = 0; | |
590 my $overlap_len = 0; | |
591 while (my $exon = shift @exons) { | |
592 my $exon_len = $exon->length; | |
593 my $pep_seq = $exon->peptide($trans)->length; | |
594 # remove the first char of seq if overlap ($exon->peptide()) return full overlapping exon seq | |
595 $pep_seq -= 1 if ($overlap_len); | |
596 $pep_len += $pep_seq; | |
597 if ($overlap_len = (($exon_len + $overlap_len ) %3)){ # if there is an overlap | |
598 $splice_site{$pep_len-1}{'overlap'} = $pep_len -1; # stores overlapping aa-exon boundary | |
599 } else { | |
600 $overlap_len = 0; | |
601 } | |
602 $splice_site{$pep_len}{'phase'} = $overlap_len; # positions of exon boundary | |
603 } | |
604 | |
605 my @exon_sequences = (); | |
606 foreach my $pep_len (sort {$b<=>$a} keys %splice_site) { # We start from the end | |
607 next if (defined($splice_site{$pep_len}{'overlap'})); | |
608 next if ($pep_len > length($sequence)); # Get rid of 1 codon STOP exons in the protein | |
609 my $length = $pep_len; | |
610 $length-- if (defined($splice_site{$pep_len}{'phase'}) && 1 == $splice_site{$pep_len}{'phase'}); | |
611 my $peptide; | |
612 $peptide = substr($sequence,$length,length($sequence),''); | |
613 unshift(@exon_sequences, $peptide); | |
614 } | |
615 unshift(@exon_sequences, $sequence); # First exon (last piece of sequence left) | |
616 | |
617 my $splice = 1; | |
618 foreach my $exon_sequence (@exon_sequences) { | |
619 if ($splice % 2 == 0) { | |
620 $exon_sequence = lc($exon_sequence); | |
621 } | |
622 $splice++; | |
623 } | |
624 my $seqsplice = join("", @exon_sequences); | |
625 | |
626 return $seqsplice; | |
627 } | |
628 | |
629 sub sequence_exon_bounded { | |
630 my $self = shift; | |
631 | |
632 if(@_) { | |
633 $self->{'_sequence_exon_bounded'} = shift; | |
634 return $self->{'_sequence_exon_bounded'}; | |
635 } | |
636 | |
637 if(!defined($self->{'_sequence_exon_bounded'})) { | |
638 $self->{'_sequence_exon_bounded'} = $self->adaptor->db->get_SequenceAdaptor->fetch_sequence_exon_bounded_by_member_id($self->member_id); | |
639 } | |
640 | |
641 if(!defined($self->{'_sequence_exon_bounded'})) { | |
642 $self->{'_sequence_exon_bounded'} = $self->_compose_sequence_exon_bounded; | |
643 } | |
644 | |
645 return $self->{'_sequence_exon_bounded'}; | |
646 } | |
647 | |
648 | |
649 sub _compose_sequence_exon_bounded { | |
650 my $self = shift; | |
651 | |
652 my $sequence = $self->sequence; | |
653 my $trans = $self->get_Transcript; | |
654 my @exons = @{$trans->get_all_translateable_Exons}; | |
655 return $sequence if (1 == scalar @exons); | |
656 | |
657 my %splice_site; | |
658 my $pep_len = 0; | |
659 my $overlap_len = 0; | |
660 while (my $exon = shift @exons) { | |
661 my $exon_len = $exon->length; | |
662 my $pep_seq = $exon->peptide($trans)->length; | |
663 # remove the first char of seq if overlap ($exon->peptide()) return full overlapping exon seq | |
664 $pep_seq -= 1 if ($overlap_len); | |
665 $pep_len += $pep_seq; | |
666 if ($overlap_len = (($exon_len + $overlap_len ) %3)){ # if there is an overlap | |
667 $splice_site{$pep_len-1}{'overlap'} = $pep_len -1; # stores overlapping aa-exon boundary | |
668 } else { | |
669 $overlap_len = 0; | |
670 } | |
671 $splice_site{$pep_len}{'phase'} = $overlap_len; # positions of exon boundary | |
672 } | |
673 | |
674 my $seqsplice = ''; | |
675 foreach my $pep_len (sort {$b<=>$a} keys %splice_site) { # We start from the end | |
676 next if (defined($splice_site{$pep_len}{'overlap'})); | |
677 next if ($pep_len > length($sequence)); # Get rid of 1 codon STOP exons in the protein | |
678 my $length = $pep_len; | |
679 $length-- if (defined($splice_site{$pep_len}{'phase'}) && 1 == $splice_site{$pep_len}{'phase'}); | |
680 my $peptide; | |
681 $peptide = substr($sequence,$length,length($sequence),''); | |
682 $seqsplice = $peptide . $seqsplice; | |
683 $seqsplice = 'o' . $seqsplice if (0 == $splice_site{$pep_len}{'phase'}); | |
684 $seqsplice = 'b' . $seqsplice if (1 == $splice_site{$pep_len}{'phase'}); | |
685 $seqsplice = 'j' . $seqsplice if (2 == $splice_site{$pep_len}{'phase'}); | |
686 } | |
687 $seqsplice = $sequence . $seqsplice; # First exon AS IS | |
688 | |
689 return $seqsplice; | |
690 } | |
691 | |
692 sub sequence_cds { | |
693 my $self = shift; | |
694 | |
695 if(@_) { | |
696 $self->{'_sequence_cds'} = shift; | |
697 return $self->{'_sequence_cds'}; | |
698 } | |
699 | |
700 if(!defined($self->{'_sequence_cds'})) { | |
701 $self->{'_sequence_cds'} = $self->adaptor->db->get_SequenceAdaptor->fetch_sequence_cds_by_member_id($self->member_id); | |
702 } | |
703 | |
704 if(!defined($self->{'_sequence_cds'})) { | |
705 $self->{'_sequence_cds'} = $self->get_Transcript->translateable_seq; | |
706 } | |
707 | |
708 return $self->{'_sequence_cds'}; | |
709 } | |
710 | |
711 # GJ 2008-11-17 | |
712 # Returns the amino acid sequence with exon boundaries denoted as O, B, or J depending on the phase (O=0, B=1, J=2) | |
713 sub get_exon_bounded_sequence { | |
714 my $self = shift; | |
715 my $numbers = shift; | |
716 my $transcript = $self->get_Transcript; | |
717 | |
718 # The get_all_translateable_exons creates a list of reformatted "translateable" exon sequences. | |
719 # When the exon phase is 1 or 2, there will be duplicated residues at the end and start of exons. | |
720 # We'll deal with this during the exon loop. | |
721 my @exons = @{$transcript->get_all_translateable_Exons}; | |
722 my $seq_string = ""; | |
723 # for my $ex (@exons) { | |
724 while (my $ex = shift @exons) { | |
725 my $seq = $ex->peptide($transcript)->seq; | |
726 | |
727 # PHASE HANDLING | |
728 my $phase = $ex->phase; | |
729 my $end_phase = $ex->end_phase; | |
730 | |
731 # First, cut off repeated end residues. | |
732 if ($end_phase == 1 && 0 < scalar @exons) { | |
733 # We only own 1/3, so drop the last residue. | |
734 $seq = substr($seq,0,-1); | |
735 } | |
736 | |
737 # Now cut off repeated start residues. | |
738 if ($phase == 2) { | |
739 # We only own 1/3, so drop the first residue. | |
740 $seq = substr($seq, 1); | |
741 } | |
742 | |
743 if ($end_phase > -1) { | |
744 $seq = $seq . 'o' if ($end_phase == 0); | |
745 $seq = $seq . 'b' if ($end_phase == 1); | |
746 $seq = $seq . 'j' if ($end_phase == 2); | |
747 } | |
748 #print "Start_phase: $phase End_phase: $end_phase\t$seq\n"; | |
749 $seq_string .= $seq; | |
750 } | |
751 if (defined $numbers) { | |
752 $seq_string =~ s/o/0/g; $seq_string =~ s/b/1/g; $seq_string =~ s/j/2/g; | |
753 } | |
754 return $seq_string; | |
755 } | |
756 | |
757 =head2 seq_length | |
758 | |
759 Example : my $seq_length = $member->seq_length; | |
760 Description: get the sequence length of this member | |
761 Returntype : int | |
762 Exceptions : none | |
763 Caller : general | |
764 | |
765 =cut | |
766 | |
767 sub seq_length { | |
768 my $self = shift; | |
769 | |
770 unless(defined($self->{'_seq_length'})) { | |
771 #need to check case if user is calling seq_length first | |
772 #call $self->sequence (to lazy load if needed) | |
773 my $seq = $self->sequence; | |
774 $self->{'_seq_length'} = length($seq) if(defined($seq)); | |
775 } | |
776 return $self->{'_seq_length'}; | |
777 } | |
778 | |
779 | |
780 =head2 sequence_id | |
781 | |
782 Arg [1] : int $sequence_id | |
783 Example : my $sequence_id = $member->sequence_id; | |
784 Description: Extracts the sequence_id of this member | |
785 Returntype : int | |
786 Exceptions : none | |
787 Caller : general | |
788 | |
789 =cut | |
790 | |
791 sub sequence_id { | |
792 my $self = shift; | |
793 $self->{'_sequence_id'} = shift if(@_); | |
794 if(!defined($self->{'_sequence_id'})) { $self->{'_sequence_id'}=0; } | |
795 return $self->{'_sequence_id'}; | |
796 } | |
797 | |
798 =head2 gene_member_id | |
799 | |
800 Arg [1] : int $gene_member_id | |
801 Example : my $gene_member_id = $member->gene_member_id; | |
802 Description: Gene_member_id of this protein member | |
803 Returntype : int | |
804 Exceptions : none | |
805 Caller : general | |
806 | |
807 =cut | |
808 | |
809 sub gene_member_id { | |
810 my $self = shift; | |
811 $self->{'_gene_member_id'} = shift if(@_); | |
812 return $self->{'_gene_member_id'}; | |
813 } | |
814 | |
815 | |
816 =head2 bioseq | |
817 | |
818 Args : none | |
819 Example : my $primaryseq = $member->primaryseq; | |
820 Description: returns sequence this member as a Bio::Seq object | |
821 Returntype : Bio::Seq object | |
822 Exceptions : none | |
823 Caller : general | |
824 | |
825 =cut | |
826 | |
827 sub bioseq { | |
828 my $self = shift; | |
829 | |
830 throw("Member stable_id undefined") unless defined($self->stable_id()); | |
831 throw("No sequence for member " . $self->stable_id()) unless defined($self->sequence()); | |
832 | |
833 my $seqname; | |
834 if (defined($self->genome_db_id) and defined($self->dbID)) { | |
835 $seqname = "IDs:" . $self->genome_db_id . ":" . $self->dbID . " " . | |
836 $self->source_name . ":" . $self->stable_id; | |
837 } else { | |
838 $seqname = $self->source_name . ":" . $self->stable_id; | |
839 } | |
840 my $seq = Bio::Seq->new(-seq => $self->sequence(), | |
841 -primary_id => "member_id_".$self->dbID, | |
842 -display_id => "member_id_".$self->dbID, | |
843 -desc => $seqname ."|". $self->description(), | |
844 ); | |
845 return $seq; | |
846 } | |
847 | |
848 =head2 gene_member | |
849 | |
850 Arg[1] : Bio::EnsEMBL::Compara::Member $geneMember (optional) | |
851 Example : my $gene_member = $member->gene_member; | |
852 Description: returns gene member object for this protein member | |
853 Returntype : Bio::EnsEMBL::Compara::Member object | |
854 Exceptions : if arg[0] is not a Bio::EnsEMBL::Compara::Member object | |
855 Caller : MemberAdaptor(set), general | |
856 | |
857 =cut | |
858 | |
859 sub gene_member { | |
860 my $self = shift; | |
861 my $gene_member = shift; | |
862 | |
863 if ($gene_member) { | |
864 throw("arg must be a [Bio::EnsEMBL::Compara::Member] not a [$gene_member]") | |
865 unless ($gene_member->isa('Bio::EnsEMBL::Compara::Member')); | |
866 $self->{'_gene_member'} = $gene_member; | |
867 } | |
868 if(!defined($self->{'_gene_member'}) and | |
869 defined($self->adaptor) and $self->dbID) | |
870 { | |
871 $self->{'_gene_member'} = $self->adaptor->db->get_MemberAdaptor->fetch_gene_for_peptide_member_id($self->dbID); | |
872 } | |
873 return $self->{'_gene_member'}; | |
874 } | |
875 | |
876 =head2 print_member | |
877 | |
878 Arg[1] : string to be prrinted instead of "\n" | |
879 Example : $member->print_member("BRH"); | |
880 Description: used for debugging, prints out key descriptive elements | |
881 of member | |
882 Returntype : none | |
883 Exceptions : none | |
884 Caller : general | |
885 | |
886 =cut | |
887 | |
888 sub print_member | |
889 | |
890 { | |
891 my $self = shift; | |
892 my $postfix = shift; | |
893 | |
894 printf(" %s %s(%d)\t%s : %d-%d",$self->source_name, $self->stable_id, | |
895 $self->dbID,$self->chr_name,$self->chr_start, $self->chr_end); | |
896 if($postfix) { print(" $postfix"); } | |
897 else { print("\n"); } | |
898 } | |
899 | |
900 | |
901 =head2 get_Gene | |
902 | |
903 Args : none | |
904 Example : $gene = $member->get_Gene | |
905 Description: if member is an 'ENSEMBLGENE' returns Bio::EnsEMBL::Gene object | |
906 by connecting to ensembl genome core database | |
907 REQUIRES properly setup Registry conf file or | |
908 manually setting genome_db->db_adaptor for each genome. | |
909 Returntype : Bio::EnsEMBL::Gene or undef | |
910 Exceptions : none | |
911 Caller : general | |
912 | |
913 =cut | |
914 | |
915 sub get_Gene { | |
916 my $self = shift; | |
917 | |
918 return $self->{'core_gene'} if($self->{'core_gene'}); | |
919 | |
920 unless($self->genome_db and | |
921 $self->genome_db->db_adaptor and | |
922 $self->genome_db->db_adaptor->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')) | |
923 { | |
924 throw("unable to connect to core ensembl database: missing registry and genome_db.locator"); | |
925 } | |
926 | |
927 my $coreDBA = $self->genome_db->db_adaptor; | |
928 if($self->source_name eq 'ENSEMBLGENE') { | |
929 $self->{'core_gene'} = $coreDBA->get_GeneAdaptor->fetch_by_stable_id($self->stable_id); | |
930 } | |
931 if($self->source_name eq 'ENSEMBLPEP') { | |
932 $self->{'core_gene'} = $coreDBA->get_GeneAdaptor->fetch_by_stable_id($self->gene_member->stable_id); | |
933 } | |
934 return $self->{'core_gene'}; | |
935 } | |
936 | |
937 =head2 get_Transcript | |
938 | |
939 Args : none | |
940 Example : $transcript = $member->get_Transcript | |
941 Description: if member is an 'ENSEMBLPEP' returns Bio::EnsEMBL::Transcript object | |
942 by connecting to ensembl genome core database | |
943 REQUIRES properly setup Registry conf file or | |
944 manually setting genome_db->db_adaptor for each genome. | |
945 Returntype : Bio::EnsEMBL::Transcript or undef | |
946 Exceptions : none | |
947 Caller : general | |
948 | |
949 =cut | |
950 | |
951 sub get_Transcript { | |
952 my $self = shift; | |
953 | |
954 return undef unless($self->source_name eq 'ENSEMBLPEP'); | |
955 return $self->{'core_transcript'} if($self->{'core_transcript'}); | |
956 | |
957 unless($self->genome_db and | |
958 $self->genome_db->db_adaptor and | |
959 $self->genome_db->db_adaptor->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')) | |
960 { | |
961 throw("unable to connect to core ensembl database: missing registry and genome_db.locator"); | |
962 } | |
963 my $coreDBA = $self->genome_db->db_adaptor; | |
964 $self->{'core_transcript'} = $coreDBA->get_TranscriptAdaptor->fetch_by_translation_stable_id($self->stable_id); | |
965 return $self->{'core_transcript'}; | |
966 } | |
967 | |
968 | |
969 =head2 get_Translation | |
970 | |
971 Args : none | |
972 Example : $translation = $member->get_Translation | |
973 Description: if member is an 'ENSEMBLPEP' returns Bio::EnsEMBL::Translation object | |
974 by connecting to ensembl genome core database | |
975 REQUIRES properly setup Registry conf file or | |
976 manually setting genome_db->db_adaptor for each genome. | |
977 Returntype : Bio::EnsEMBL::Gene or undef | |
978 Exceptions : none | |
979 Caller : general | |
980 | |
981 =cut | |
982 | |
983 sub get_Translation { | |
984 my $self = shift; | |
985 | |
986 if($self->get_Transcript) { | |
987 my $transcript = $self->get_Transcript; | |
988 return $transcript->translation(); | |
989 } | |
990 return undef; | |
991 } | |
992 | |
993 sub gene { | |
994 my $self = shift; | |
995 deprecate('Use get_Gene() instead'); | |
996 return $self->get_Gene; | |
997 } | |
998 sub transcript { | |
999 my $self = shift; | |
1000 deprecate('Use get_Transcript() instead'); | |
1001 return $self->get_Transcript; | |
1002 } | |
1003 sub translation { | |
1004 my $self = shift; | |
1005 deprecate('Use get_Translation() instead'); | |
1006 return $self->get_Translation(); | |
1007 } | |
1008 | |
1009 | |
1010 =head2 get_canonical_Member | |
1011 | |
1012 Args : none | |
1013 Example : $canonicalMember = $member->get_canonical_Member | |
1014 Description: if member is an "ENSEMBLGENE" it will return the canonical peptide / transcript member | |
1015 if member is an 'ENSEMBLPEP' it will get its gene member and have it | |
1016 if member is an 'ENSEMBLTRANS' it will get its gene member and have it | |
1017 return the canonical peptide / transcript (which could be the same as the starting member) | |
1018 Returntype : Bio::EnsEMBL::Compara::Member or undef | |
1019 Exceptions : none | |
1020 Caller : general | |
1021 | |
1022 =cut | |
1023 | |
1024 sub get_canonical_Member { | |
1025 my $self = shift; | |
1026 | |
1027 return unless($self->adaptor); | |
1028 | |
1029 my $able_adaptor = UNIVERSAL::can($self->adaptor, 'fetch_canonical_member_for_gene_member_id') | |
1030 ? $self->adaptor # a MemberAdaptor or derivative | |
1031 : $self->adaptor->db->get_MemberAdaptor; | |
1032 | |
1033 if($self->source_name eq 'ENSEMBLGENE') { | |
1034 | |
1035 return $able_adaptor->fetch_canonical_member_for_gene_member_id($self->dbID); | |
1036 | |
1037 } elsif(($self->source_name eq 'ENSEMBLPEP') or ($self->source_name eq 'ENSEMBLTRANS')) { | |
1038 | |
1039 my $geneMember = $self->gene_member or return; | |
1040 | |
1041 return $able_adaptor->fetch_canonical_member_for_gene_member_id($geneMember->dbID); | |
1042 | |
1043 } else { | |
1044 | |
1045 return undef; | |
1046 } | |
1047 } | |
1048 | |
1049 | |
1050 =head2 get_canonical_peptide_Member | |
1051 | |
1052 Description: DEPRECATED. Use get_canonical_Member() instead | |
1053 | |
1054 =cut | |
1055 | |
1056 sub get_canonical_peptide_Member { | |
1057 my $self = shift; | |
1058 | |
1059 deprecate('Use get_canonical_Member() instead'); | |
1060 return $self->get_canonical_Member(@_); | |
1061 } | |
1062 | |
1063 | |
1064 =head2 get_canonical_transcript_Member | |
1065 | |
1066 Description: DEPRECATED. Use get_canonical_Member() instead | |
1067 | |
1068 =cut | |
1069 | |
1070 sub get_canonical_transcript_Member { | |
1071 my $self = shift; | |
1072 | |
1073 deprecate('Use get_canonical_Member() instead'); | |
1074 return $self->get_canonical_Member(@_); | |
1075 } | |
1076 | |
1077 | |
1078 =head2 get_all_peptide_Members | |
1079 | |
1080 Args : none | |
1081 Example : $pepMembers = $gene_member->get_all_peptide_Members | |
1082 Description: return listref of all peptide members of this gene_member | |
1083 Returntype : array ref of Bio::EnsEMBL::Compara::Member | |
1084 Exceptions : throw if not an ENSEMBLGENE | |
1085 Caller : general | |
1086 | |
1087 =cut | |
1088 | |
1089 sub get_all_peptide_Members { | |
1090 my $self = shift; | |
1091 | |
1092 throw("adaptor undefined, can access database") unless($self->adaptor); | |
1093 throw("not an ENSEMBLGENE member") if($self->source_name ne 'ENSEMBLGENE'); | |
1094 | |
1095 my $able_adaptor = UNIVERSAL::can($self->adaptor, 'fetch_all_peptides_for_gene_member_id') | |
1096 ? $self->adaptor # a MemberAdaptor or derivative | |
1097 : $self->adaptor->db->get_MemberAdaptor; | |
1098 | |
1099 | |
1100 return $able_adaptor->fetch_all_peptides_for_gene_member_id($self->dbID); | |
1101 } | |
1102 | |
1103 | |
1104 1; |