comparison variant_effect_predictor/Bio/EnsEMBL/Compara/Member.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 package Bio::EnsEMBL::Compara::Member;
2
3 use strict;
4 use Bio::Seq;
5 use Bio::EnsEMBL::Utils::Argument;
6 use Bio::EnsEMBL::Utils::Exception;
7 use Bio::EnsEMBL::Gene;
8 use Bio::EnsEMBL::Compara::GenomeDB;
9
10
11 =head2 new (CONSTRUCTOR)
12
13 Arg [-DBID] : (opt)
14 : int $dbID (the database internal ID for this object)
15 Arg [-ADAPTOR]
16 : Bio::EnsEMBL::Compara::DBSQL::Member $adaptor
17 (the adaptor for connecting to the database)
18 Arg [-DESCRIPTION] (opt)
19 : string $description
20 Arg [-SOURCE_NAME] (opt)
21 : string $source_name
22 (e.g., "ENSEMBLGENE", "ENSEMBLPEP", "Uniprot/SWISSPROT", "Uniprot/SPTREMBL")
23 Arg [-TAXON_ID] (opt)
24 : int $taxon_id
25 (NCBI taxonomy id for the member)
26 Arg [-GENOME_DB_ID] (opt)
27 : int $genome_db_id
28 (the $genome_db->dbID for a species in the database)
29 Arg [-SEQUENCE_ID] (opt)
30 : int $sequence_id
31 (the $sequence_id for the sequence table in the database)
32 Example :
33 my $member = new Bio::EnsEMBL::Compara::Member;
34 Description: Creates a new Member object
35 Returntype : Bio::EnsEMBL::Compara::Member
36 Exceptions : none
37 Caller : general
38 Status : Stable
39
40 =cut
41
42 sub new {
43 my ($class, @args) = @_;
44
45 my $self = bless {}, $class;
46
47 if (scalar @args) {
48 #do this explicitly.
49 my ($dbid, $stable_id, $description, $source_name, $adaptor, $taxon_id, $genome_db_id, $sequence_id) = rearrange([qw(DBID STABLE_ID DESCRIPTION SOURCE_NAME ADAPTOR TAXON_ID GENOME_DB_ID SEQUENCE_ID)], @args);
50
51 $dbid && $self->dbID($dbid);
52 $stable_id && $self->stable_id($stable_id);
53 $description && $self->description($description);
54 $source_name && $self->source_name($source_name);
55 $adaptor && $self->adaptor($adaptor);
56 $taxon_id && $self->taxon_id($taxon_id);
57 $genome_db_id && $self->genome_db_id($genome_db_id);
58 $sequence_id && $self->sequence_id($sequence_id);
59 }
60
61 return $self;
62 }
63
64
65 =head2 copy
66
67 Arg [1] : object $parent_object (optional)
68 Example :
69 Description: copies the object, optionally by topping up a given structure (to support multiple inheritance)
70 Returntype :
71 Exceptions :
72 Caller :
73
74 =cut
75
76 sub copy {
77 my $self = shift;
78
79 my $mycopy = @_ ? shift : {};
80 bless $mycopy, 'Bio::EnsEMBL::Compara::Member';
81
82 $mycopy->dbID($self->dbID);
83 $mycopy->stable_id($self->stable_id);
84 $mycopy->version($self->version);
85 $mycopy->description($self->description);
86 $mycopy->source_name($self->source_name);
87 #$mycopy->adaptor($self->adaptor);
88 $mycopy->chr_name($self->chr_name);
89 $mycopy->chr_start($self->chr_start);
90 $mycopy->chr_end($self->chr_end);
91 $mycopy->chr_strand($self->chr_strand);
92 $mycopy->taxon_id($self->taxon_id);
93 $mycopy->genome_db_id($self->genome_db_id);
94 $mycopy->sequence_id($self->sequence_id);
95 $mycopy->gene_member_id($self->gene_member_id);
96 $mycopy->display_label($self->display_label);
97
98 return $mycopy;
99 }
100
101
102 =head2 new_fast
103
104 Arg [1] : hash reference $hashref
105 Example : none
106 Description: This is an ultra fast constructor which requires knowledge of
107 the objects internals to be used.
108 Returntype :
109 Exceptions : none
110 Caller :
111
112 =cut
113
114 sub new_fast {
115 my ($class, $hashref) = @_;
116
117 return bless $hashref, $class;
118 }
119
120 =head2 new_from_gene
121
122 Args : Requires both an Bio::Ensembl:Gene object and a
123 : Bio::Ensembl:Compara:GenomeDB object
124 Example : $member = Bio::EnsEMBL::Compara::Member->new_from_gene(
125 -gene => $gene,
126 -genome_db => $genome_db);
127 Description: contructor method which takes an Ensembl::Gene object
128 and Compara:GenomeDB object and creates a new Member object
129 translating from the Gene object
130 Returntype : Bio::Ensembl::Compara::Member
131 Exceptions :
132 Caller :
133
134 =cut
135
136 sub new_from_gene {
137 my ($class, @args) = @_;
138 my $self = $class->new(@args);
139
140 if (scalar @args) {
141
142 my ($gene, $genome_db) = rearrange([qw(GENE GENOME_DB)], @args);
143
144 unless(defined($gene) and $gene->isa('Bio::EnsEMBL::Gene')) {
145 throw(
146 "gene arg must be a [Bio::EnsEMBL::Gene] ".
147 "not a [$gene]");
148 }
149 unless(defined($genome_db) and $genome_db->isa('Bio::EnsEMBL::Compara::GenomeDB')) {
150 throw(
151 "genome_db arg must be a [Bio::EnsEMBL::Compara::GenomeDB] ".
152 "not a [$genome_db]");
153 }
154 unless (defined $gene->stable_id) {
155 throw("COREDB error: does not contain gene_stable_id for gene_id ". $gene->dbID."\n");
156 }
157
158 $self->stable_id($gene->stable_id);
159 $self->taxon_id($genome_db->taxon_id);
160 $self->description($gene->description);
161 $self->genome_db_id($genome_db->dbID);
162 $self->chr_name($gene->seq_region_name);
163 $self->chr_start($gene->seq_region_start);
164 $self->chr_end($gene->seq_region_end);
165 $self->chr_strand($gene->seq_region_strand);
166 $self->source_name("ENSEMBLGENE");
167 $self->version($gene->version);
168 }
169 return $self;
170 }
171
172
173 =head2 new_from_transcript
174
175 Arg[1] : Bio::Ensembl:Transcript object
176 Arg[2] : Bio::Ensembl:Compara:GenomeDB object
177 Arg[3] : string where value='translate' causes transcript object to translate
178 to a peptide
179 Example : $member = Bio::EnsEMBL::Compara::Member->new_from_transcript(
180 $transcript, $genome_db,
181 -translate);
182 Description: contructor method which takes an Ensembl::Gene object
183 and Compara:GenomeDB object and creates a new Member object
184 translating from the Gene object
185 Returntype : Bio::Ensembl::Compara::Member
186 Exceptions :
187 Caller :
188
189 =cut
190
191 sub new_from_transcript {
192 my ($class, @args) = @_;
193 my $self = $class->new(@args);
194 my $peptideBioSeq;
195 my $seq_string;
196
197 my ($transcript, $genome_db, $translate, $description) = rearrange([qw(TRANSCRIPT GENOME_DB TRANSLATE DESCRIPTION)], @args);
198 #my ($transcript, $genome_db, $translate) = @args;
199
200 unless(defined($transcript) and $transcript->isa('Bio::EnsEMBL::Transcript')) {
201 throw(
202 "transcript arg must be a [Bio::EnsEMBL::Transcript]".
203 "not a [$transcript]");
204 }
205 unless(defined($genome_db) and $genome_db->isa('Bio::EnsEMBL::Compara::GenomeDB')) {
206 throw(
207 "genome_db arg must be a [Bio::EnsEMBL::Compara::GenomeDB] ".
208 "not a [$genome_db]");
209 }
210 $self->taxon_id($genome_db->taxon_id);
211 if(defined($description)) { $self->description($description); }
212 else { $self->description("NULL"); }
213 $self->genome_db_id($genome_db->dbID);
214 $self->chr_name($transcript->seq_region_name);
215 $self->chr_start($transcript->coding_region_start);
216 $self->chr_end($transcript->coding_region_end);
217 $self->chr_strand($transcript->seq_region_strand);
218 $self->version($transcript->translation->version) if ($translate eq 'yes');
219
220 if(($translate eq 'translate') or ($translate eq 'yes')) {
221 if(not defined($transcript->translation)) {
222 throw("request to translate a transcript without a defined translation",
223 $transcript->stable_id);
224 }
225 unless (defined $transcript->translation->stable_id) {
226 throw("COREDB error: does not contain translation stable id for translation_id ".$transcript->translation->dbID."\n");
227 }
228
229 $self->stable_id($transcript->translation->stable_id);
230 $self->source_name("ENSEMBLPEP");
231
232 unless ($peptideBioSeq = $transcript->translate) {
233 throw("COREDB error: unable to get a BioSeq translation from ". $transcript->stable_id);
234 }
235 eval {
236 $seq_string = $peptideBioSeq->seq;
237 };
238 throw "COREDB error: can't get seq from peptideBioSeq" if $@;
239 # OR
240 #$seq_string = $transcript->translation->seq;
241
242 if ($seq_string =~ /^X+$/) {
243 warn("X+ in sequence from translation " . $transcript->translation->stable_id."\n");
244 }
245 elsif (length($seq_string) == 0) {
246 warn("zero length sequence from translation " . $transcript->translation->stable_id."\n");
247 }
248 else {
249 #$seq_string =~ s/(.{72})/$1\n/g;
250 $self->sequence($seq_string);
251 }
252 } elsif ($translate eq 'ncrna') {
253 unless (defined $transcript->stable_id) {
254 throw("COREDB error: does not contain transcript stable id for transcript_id ".$transcript->dbID."\n");
255 }
256 $self->stable_id($transcript->stable_id);
257 $self->source_name("ENSEMBLTRANS");
258
259 unless ($seq_string = $transcript->spliced_seq) {
260 throw("COREDB error: unable to get a BioSeq spliced_seq from ". $transcript->stable_id);
261 }
262 if (length($seq_string) == 0) {
263 warn("zero length sequence from transcript " . $transcript->stable_id."\n");
264 }
265 $self->sequence($seq_string);
266 }
267
268 #print("Member->new_from_transcript\n");
269 #print(" source_name = '" . $self->source_name . "'\n");
270 #print(" stable_id = '" . $self->stable_id . "'\n");
271 #print(" taxon_id = '" . $self->taxon_id . "'\n");
272 #print(" chr_name = '" . $self->chr_name . "'\n");
273 return $self;
274 }
275
276
277 =head2 member_id
278
279 Arg [1] : int $member_id (optional)
280 Example :
281 Description:
282 Returntype :
283 Exceptions :
284 Caller :
285
286 =cut
287
288 sub member_id {
289 my $self = shift;
290 return $self->dbID(@_);
291 }
292
293
294 =head2 dbID
295
296 Arg [1] : int $dbID (optional)
297 Example :
298 Description:
299 Returntype :
300 Exceptions :
301 Caller :
302
303 =cut
304
305 sub dbID {
306 my $self = shift;
307 $self->{'_dbID'} = shift if(@_);
308 return $self->{'_dbID'};
309 }
310
311 =head2 stable_id
312
313 Arg [1] : string $stable_id (optional)
314 Example :
315 Description:
316 Returntype :
317 Exceptions :
318 Caller :
319
320 =cut
321
322 sub stable_id {
323 my $self = shift;
324 $self->{'_stable_id'} = shift if(@_);
325 return $self->{'_stable_id'};
326 }
327
328 =head2 display_label
329
330 Arg [1] : string $display_label (optional)
331 Example :
332 Description:
333 Returntype :
334 Exceptions :
335 Caller :
336
337 =cut
338
339 sub display_label {
340 my $self = shift;
341 $self->{'_display_label'} = shift if(@_);
342 return $self->{'_display_label'};
343 }
344
345 =head2 version
346
347 Arg [1] :
348 Example :
349 Description:
350 Returntype :
351 Exceptions :
352 Caller :
353
354 =cut
355
356 sub version {
357 my $self = shift;
358 $self->{'_version'} = shift if(@_);
359 $self->{'_version'} = 0 unless(defined($self->{'_version'}));
360 return $self->{'_version'};
361 }
362
363 =head2 description
364
365 Arg [1] : string $description (optional)
366 Example :
367 Description:
368 Returntype : string
369 Exceptions :
370 Caller :
371
372 =cut
373
374 sub description {
375 my $self = shift;
376 $self->{'_description'} = shift if(@_);
377 return $self->{'_description'};
378 }
379
380 =head2 source_name
381
382 =cut
383
384 sub source_name {
385 my $self = shift;
386 $self->{'_source_name'} = shift if (@_);
387 return $self->{'_source_name'};
388 }
389
390 =head2 adaptor
391
392 Arg [1] : string $adaptor (optional)
393 corresponding to a perl module
394 Example :
395 Description:
396 Returntype :
397 Exceptions :
398 Caller :
399
400 =cut
401
402 sub adaptor {
403 my $self = shift;
404 $self->{'_adaptor'} = shift if(@_);
405 return $self->{'_adaptor'};
406 }
407
408 =head2 chr_name
409
410 =cut
411
412 sub chr_name {
413 my $self = shift;
414 $self->{'_chr_name'} = shift if (@_);
415 return $self->{'_chr_name'};
416 }
417
418 =head2 chr_start
419
420 =cut
421
422 sub chr_start {
423 my $self = shift;
424 $self->{'_chr_start'} = shift if (@_);
425 return $self->{'_chr_start'};
426 }
427
428 =head2 chr_end
429
430 =cut
431
432 sub chr_end {
433 my $self = shift;
434 $self->{'_chr_end'} = shift if (@_);
435 return $self->{'_chr_end'};
436 }
437
438 =head2 chr_strand
439
440 Arg [1] : integer
441 Description: Returns the strand of the member. Defined strands are 1 or -1.
442 0 is undefined strand.
443 Returntype : 1,0,-1
444 Exceptions : none
445 Caller : general
446
447 =cut
448
449 sub chr_strand {
450 my $self = shift;
451 $self->{'_chr_strand'} = shift if (@_);
452 $self->{'_chr_strand'}='0' unless(defined($self->{'_chr_strand'}));
453 return $self->{'_chr_strand'};
454 }
455
456 =head2 taxon_id
457
458 =cut
459
460 sub taxon_id {
461 my $self = shift;
462 $self->{'_taxon_id'} = shift if (@_);
463 return $self->{'_taxon_id'};
464 }
465
466 =head2 taxon
467
468 =cut
469
470 sub taxon {
471 my $self = shift;
472
473 if (@_) {
474 my $taxon = shift;
475 unless ($taxon->isa('Bio::EnsEMBL::Compara::NCBITaxon')) {
476 throw(
477 "taxon arg must be a [Bio::EnsEMBL::Compara::NCBITaxon".
478 "not a [$taxon]");
479 }
480 $self->{'_taxon'} = $taxon;
481 $self->taxon_id($taxon->ncbi_taxid);
482 } else {
483 unless (defined $self->{'_taxon'}) {
484 unless (defined $self->taxon_id) {
485 throw("can't fetch Taxon without a taxon_id");
486 }
487 my $NCBITaxonAdaptor = $self->adaptor->db->get_NCBITaxonAdaptor;
488 $self->{'_taxon'} = $NCBITaxonAdaptor->fetch_node_by_taxon_id($self->taxon_id);
489 }
490 }
491
492 return $self->{'_taxon'};
493 }
494
495 =head2 genome_db_id
496
497 =cut
498
499 sub genome_db_id {
500 my $self = shift;
501 $self->{'_genome_db_id'} = shift if (@_);
502 return $self->{'_genome_db_id'};
503 }
504
505 =head2 genome_db
506
507 =cut
508
509 sub genome_db {
510 my $self = shift;
511
512 if (@_) {
513 my $genome_db = shift;
514 unless ($genome_db->isa('Bio::EnsEMBL::Compara::GenomeDB')) {
515 throw(
516 "arg must be a [Bio::EnsEMBL::Compara::GenomeDB".
517 "not a [$genome_db]");
518 }
519 $self->{'_genome_db'} = $genome_db;
520 $self->genome_db_id($genome_db->dbID);
521 } else {
522 unless (defined $self->{'_genome_db'}) {
523 unless (defined $self->genome_db_id and defined $self->adaptor) {
524 throw("can't fetch GenomeDB without an adaptor and genome_db_id");
525 }
526 my $GenomeDBAdaptor = $self->adaptor->db->get_GenomeDBAdaptor;
527 $self->{'_genome_db'} = $GenomeDBAdaptor->fetch_by_dbID($self->genome_db_id);
528 }
529 }
530
531 return $self->{'_genome_db'};
532 }
533
534 =head2 sequence
535
536 Arg [1] : string $sequence
537 Example : my $seq = $member->sequence;
538 Description: Get/set the sequence string of this member
539 Will lazy load by sequence_id if needed and able
540 Returntype : string
541 Exceptions : none
542 Caller : general
543
544 =cut
545
546 sub sequence {
547 my $self = shift;
548
549 if(@_) {
550 $self->{'_seq_length'} = undef;
551 $self->{'_sequence'} = shift;
552 $self->{'_seq_length'} = length($self->{'_sequence'}) if(defined($self->{'_sequence'}));
553 return $self->{'_sequence'};
554 }
555
556 if(!defined($self->{'_sequence'}) and
557 defined($self->sequence_id()) and
558 defined($self->adaptor))
559 {
560 $self->{'_sequence'} = $self->adaptor->db->get_SequenceAdaptor->fetch_by_dbID($self->sequence_id);
561 $self->{'_seq_length'} = length($self->{'_sequence'}) if(defined($self->{'_sequence'}));
562 }
563
564 return $self->{'_sequence'};
565 }
566
567 =head2 sequence_exon_cased
568
569 Args : none
570 Example : my $sequence_exon_cased = $member->sequence_exon_cased;
571
572 Description: Get/set the sequence string of this peptide member with
573 alternating upper and lower case corresponding to the translateable exons.
574 Returntype : string
575 Exceptions : none
576 Caller : general
577
578 =cut
579
580 sub sequence_exon_cased {
581 my $self = shift;
582
583 my $sequence = $self->sequence;
584 my $trans = $self->get_Transcript;
585 my @exons = @{$trans->get_all_translateable_Exons};
586 return $sequence if (1 == scalar @exons);
587
588 my %splice_site;
589 my $pep_len = 0;
590 my $overlap_len = 0;
591 while (my $exon = shift @exons) {
592 my $exon_len = $exon->length;
593 my $pep_seq = $exon->peptide($trans)->length;
594 # remove the first char of seq if overlap ($exon->peptide()) return full overlapping exon seq
595 $pep_seq -= 1 if ($overlap_len);
596 $pep_len += $pep_seq;
597 if ($overlap_len = (($exon_len + $overlap_len ) %3)){ # if there is an overlap
598 $splice_site{$pep_len-1}{'overlap'} = $pep_len -1; # stores overlapping aa-exon boundary
599 } else {
600 $overlap_len = 0;
601 }
602 $splice_site{$pep_len}{'phase'} = $overlap_len; # positions of exon boundary
603 }
604
605 my @exon_sequences = ();
606 foreach my $pep_len (sort {$b<=>$a} keys %splice_site) { # We start from the end
607 next if (defined($splice_site{$pep_len}{'overlap'}));
608 next if ($pep_len > length($sequence)); # Get rid of 1 codon STOP exons in the protein
609 my $length = $pep_len;
610 $length-- if (defined($splice_site{$pep_len}{'phase'}) && 1 == $splice_site{$pep_len}{'phase'});
611 my $peptide;
612 $peptide = substr($sequence,$length,length($sequence),'');
613 unshift(@exon_sequences, $peptide);
614 }
615 unshift(@exon_sequences, $sequence); # First exon (last piece of sequence left)
616
617 my $splice = 1;
618 foreach my $exon_sequence (@exon_sequences) {
619 if ($splice % 2 == 0) {
620 $exon_sequence = lc($exon_sequence);
621 }
622 $splice++;
623 }
624 my $seqsplice = join("", @exon_sequences);
625
626 return $seqsplice;
627 }
628
629 sub sequence_exon_bounded {
630 my $self = shift;
631
632 if(@_) {
633 $self->{'_sequence_exon_bounded'} = shift;
634 return $self->{'_sequence_exon_bounded'};
635 }
636
637 if(!defined($self->{'_sequence_exon_bounded'})) {
638 $self->{'_sequence_exon_bounded'} = $self->adaptor->db->get_SequenceAdaptor->fetch_sequence_exon_bounded_by_member_id($self->member_id);
639 }
640
641 if(!defined($self->{'_sequence_exon_bounded'})) {
642 $self->{'_sequence_exon_bounded'} = $self->_compose_sequence_exon_bounded;
643 }
644
645 return $self->{'_sequence_exon_bounded'};
646 }
647
648
649 sub _compose_sequence_exon_bounded {
650 my $self = shift;
651
652 my $sequence = $self->sequence;
653 my $trans = $self->get_Transcript;
654 my @exons = @{$trans->get_all_translateable_Exons};
655 return $sequence if (1 == scalar @exons);
656
657 my %splice_site;
658 my $pep_len = 0;
659 my $overlap_len = 0;
660 while (my $exon = shift @exons) {
661 my $exon_len = $exon->length;
662 my $pep_seq = $exon->peptide($trans)->length;
663 # remove the first char of seq if overlap ($exon->peptide()) return full overlapping exon seq
664 $pep_seq -= 1 if ($overlap_len);
665 $pep_len += $pep_seq;
666 if ($overlap_len = (($exon_len + $overlap_len ) %3)){ # if there is an overlap
667 $splice_site{$pep_len-1}{'overlap'} = $pep_len -1; # stores overlapping aa-exon boundary
668 } else {
669 $overlap_len = 0;
670 }
671 $splice_site{$pep_len}{'phase'} = $overlap_len; # positions of exon boundary
672 }
673
674 my $seqsplice = '';
675 foreach my $pep_len (sort {$b<=>$a} keys %splice_site) { # We start from the end
676 next if (defined($splice_site{$pep_len}{'overlap'}));
677 next if ($pep_len > length($sequence)); # Get rid of 1 codon STOP exons in the protein
678 my $length = $pep_len;
679 $length-- if (defined($splice_site{$pep_len}{'phase'}) && 1 == $splice_site{$pep_len}{'phase'});
680 my $peptide;
681 $peptide = substr($sequence,$length,length($sequence),'');
682 $seqsplice = $peptide . $seqsplice;
683 $seqsplice = 'o' . $seqsplice if (0 == $splice_site{$pep_len}{'phase'});
684 $seqsplice = 'b' . $seqsplice if (1 == $splice_site{$pep_len}{'phase'});
685 $seqsplice = 'j' . $seqsplice if (2 == $splice_site{$pep_len}{'phase'});
686 }
687 $seqsplice = $sequence . $seqsplice; # First exon AS IS
688
689 return $seqsplice;
690 }
691
692 sub sequence_cds {
693 my $self = shift;
694
695 if(@_) {
696 $self->{'_sequence_cds'} = shift;
697 return $self->{'_sequence_cds'};
698 }
699
700 if(!defined($self->{'_sequence_cds'})) {
701 $self->{'_sequence_cds'} = $self->adaptor->db->get_SequenceAdaptor->fetch_sequence_cds_by_member_id($self->member_id);
702 }
703
704 if(!defined($self->{'_sequence_cds'})) {
705 $self->{'_sequence_cds'} = $self->get_Transcript->translateable_seq;
706 }
707
708 return $self->{'_sequence_cds'};
709 }
710
711 # GJ 2008-11-17
712 # Returns the amino acid sequence with exon boundaries denoted as O, B, or J depending on the phase (O=0, B=1, J=2)
713 sub get_exon_bounded_sequence {
714 my $self = shift;
715 my $numbers = shift;
716 my $transcript = $self->get_Transcript;
717
718 # The get_all_translateable_exons creates a list of reformatted "translateable" exon sequences.
719 # When the exon phase is 1 or 2, there will be duplicated residues at the end and start of exons.
720 # We'll deal with this during the exon loop.
721 my @exons = @{$transcript->get_all_translateable_Exons};
722 my $seq_string = "";
723 # for my $ex (@exons) {
724 while (my $ex = shift @exons) {
725 my $seq = $ex->peptide($transcript)->seq;
726
727 # PHASE HANDLING
728 my $phase = $ex->phase;
729 my $end_phase = $ex->end_phase;
730
731 # First, cut off repeated end residues.
732 if ($end_phase == 1 && 0 < scalar @exons) {
733 # We only own 1/3, so drop the last residue.
734 $seq = substr($seq,0,-1);
735 }
736
737 # Now cut off repeated start residues.
738 if ($phase == 2) {
739 # We only own 1/3, so drop the first residue.
740 $seq = substr($seq, 1);
741 }
742
743 if ($end_phase > -1) {
744 $seq = $seq . 'o' if ($end_phase == 0);
745 $seq = $seq . 'b' if ($end_phase == 1);
746 $seq = $seq . 'j' if ($end_phase == 2);
747 }
748 #print "Start_phase: $phase End_phase: $end_phase\t$seq\n";
749 $seq_string .= $seq;
750 }
751 if (defined $numbers) {
752 $seq_string =~ s/o/0/g; $seq_string =~ s/b/1/g; $seq_string =~ s/j/2/g;
753 }
754 return $seq_string;
755 }
756
757 =head2 seq_length
758
759 Example : my $seq_length = $member->seq_length;
760 Description: get the sequence length of this member
761 Returntype : int
762 Exceptions : none
763 Caller : general
764
765 =cut
766
767 sub seq_length {
768 my $self = shift;
769
770 unless(defined($self->{'_seq_length'})) {
771 #need to check case if user is calling seq_length first
772 #call $self->sequence (to lazy load if needed)
773 my $seq = $self->sequence;
774 $self->{'_seq_length'} = length($seq) if(defined($seq));
775 }
776 return $self->{'_seq_length'};
777 }
778
779
780 =head2 sequence_id
781
782 Arg [1] : int $sequence_id
783 Example : my $sequence_id = $member->sequence_id;
784 Description: Extracts the sequence_id of this member
785 Returntype : int
786 Exceptions : none
787 Caller : general
788
789 =cut
790
791 sub sequence_id {
792 my $self = shift;
793 $self->{'_sequence_id'} = shift if(@_);
794 if(!defined($self->{'_sequence_id'})) { $self->{'_sequence_id'}=0; }
795 return $self->{'_sequence_id'};
796 }
797
798 =head2 gene_member_id
799
800 Arg [1] : int $gene_member_id
801 Example : my $gene_member_id = $member->gene_member_id;
802 Description: Gene_member_id of this protein member
803 Returntype : int
804 Exceptions : none
805 Caller : general
806
807 =cut
808
809 sub gene_member_id {
810 my $self = shift;
811 $self->{'_gene_member_id'} = shift if(@_);
812 return $self->{'_gene_member_id'};
813 }
814
815
816 =head2 bioseq
817
818 Args : none
819 Example : my $primaryseq = $member->primaryseq;
820 Description: returns sequence this member as a Bio::Seq object
821 Returntype : Bio::Seq object
822 Exceptions : none
823 Caller : general
824
825 =cut
826
827 sub bioseq {
828 my $self = shift;
829
830 throw("Member stable_id undefined") unless defined($self->stable_id());
831 throw("No sequence for member " . $self->stable_id()) unless defined($self->sequence());
832
833 my $seqname;
834 if (defined($self->genome_db_id) and defined($self->dbID)) {
835 $seqname = "IDs:" . $self->genome_db_id . ":" . $self->dbID . " " .
836 $self->source_name . ":" . $self->stable_id;
837 } else {
838 $seqname = $self->source_name . ":" . $self->stable_id;
839 }
840 my $seq = Bio::Seq->new(-seq => $self->sequence(),
841 -primary_id => "member_id_".$self->dbID,
842 -display_id => "member_id_".$self->dbID,
843 -desc => $seqname ."|". $self->description(),
844 );
845 return $seq;
846 }
847
848 =head2 gene_member
849
850 Arg[1] : Bio::EnsEMBL::Compara::Member $geneMember (optional)
851 Example : my $gene_member = $member->gene_member;
852 Description: returns gene member object for this protein member
853 Returntype : Bio::EnsEMBL::Compara::Member object
854 Exceptions : if arg[0] is not a Bio::EnsEMBL::Compara::Member object
855 Caller : MemberAdaptor(set), general
856
857 =cut
858
859 sub gene_member {
860 my $self = shift;
861 my $gene_member = shift;
862
863 if ($gene_member) {
864 throw("arg must be a [Bio::EnsEMBL::Compara::Member] not a [$gene_member]")
865 unless ($gene_member->isa('Bio::EnsEMBL::Compara::Member'));
866 $self->{'_gene_member'} = $gene_member;
867 }
868 if(!defined($self->{'_gene_member'}) and
869 defined($self->adaptor) and $self->dbID)
870 {
871 $self->{'_gene_member'} = $self->adaptor->db->get_MemberAdaptor->fetch_gene_for_peptide_member_id($self->dbID);
872 }
873 return $self->{'_gene_member'};
874 }
875
876 =head2 print_member
877
878 Arg[1] : string to be prrinted instead of "\n"
879 Example : $member->print_member("BRH");
880 Description: used for debugging, prints out key descriptive elements
881 of member
882 Returntype : none
883 Exceptions : none
884 Caller : general
885
886 =cut
887
888 sub print_member
889
890 {
891 my $self = shift;
892 my $postfix = shift;
893
894 printf(" %s %s(%d)\t%s : %d-%d",$self->source_name, $self->stable_id,
895 $self->dbID,$self->chr_name,$self->chr_start, $self->chr_end);
896 if($postfix) { print(" $postfix"); }
897 else { print("\n"); }
898 }
899
900
901 =head2 get_Gene
902
903 Args : none
904 Example : $gene = $member->get_Gene
905 Description: if member is an 'ENSEMBLGENE' returns Bio::EnsEMBL::Gene object
906 by connecting to ensembl genome core database
907 REQUIRES properly setup Registry conf file or
908 manually setting genome_db->db_adaptor for each genome.
909 Returntype : Bio::EnsEMBL::Gene or undef
910 Exceptions : none
911 Caller : general
912
913 =cut
914
915 sub get_Gene {
916 my $self = shift;
917
918 return $self->{'core_gene'} if($self->{'core_gene'});
919
920 unless($self->genome_db and
921 $self->genome_db->db_adaptor and
922 $self->genome_db->db_adaptor->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'))
923 {
924 throw("unable to connect to core ensembl database: missing registry and genome_db.locator");
925 }
926
927 my $coreDBA = $self->genome_db->db_adaptor;
928 if($self->source_name eq 'ENSEMBLGENE') {
929 $self->{'core_gene'} = $coreDBA->get_GeneAdaptor->fetch_by_stable_id($self->stable_id);
930 }
931 if($self->source_name eq 'ENSEMBLPEP') {
932 $self->{'core_gene'} = $coreDBA->get_GeneAdaptor->fetch_by_stable_id($self->gene_member->stable_id);
933 }
934 return $self->{'core_gene'};
935 }
936
937 =head2 get_Transcript
938
939 Args : none
940 Example : $transcript = $member->get_Transcript
941 Description: if member is an 'ENSEMBLPEP' returns Bio::EnsEMBL::Transcript object
942 by connecting to ensembl genome core database
943 REQUIRES properly setup Registry conf file or
944 manually setting genome_db->db_adaptor for each genome.
945 Returntype : Bio::EnsEMBL::Transcript or undef
946 Exceptions : none
947 Caller : general
948
949 =cut
950
951 sub get_Transcript {
952 my $self = shift;
953
954 return undef unless($self->source_name eq 'ENSEMBLPEP');
955 return $self->{'core_transcript'} if($self->{'core_transcript'});
956
957 unless($self->genome_db and
958 $self->genome_db->db_adaptor and
959 $self->genome_db->db_adaptor->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'))
960 {
961 throw("unable to connect to core ensembl database: missing registry and genome_db.locator");
962 }
963 my $coreDBA = $self->genome_db->db_adaptor;
964 $self->{'core_transcript'} = $coreDBA->get_TranscriptAdaptor->fetch_by_translation_stable_id($self->stable_id);
965 return $self->{'core_transcript'};
966 }
967
968
969 =head2 get_Translation
970
971 Args : none
972 Example : $translation = $member->get_Translation
973 Description: if member is an 'ENSEMBLPEP' returns Bio::EnsEMBL::Translation object
974 by connecting to ensembl genome core database
975 REQUIRES properly setup Registry conf file or
976 manually setting genome_db->db_adaptor for each genome.
977 Returntype : Bio::EnsEMBL::Gene or undef
978 Exceptions : none
979 Caller : general
980
981 =cut
982
983 sub get_Translation {
984 my $self = shift;
985
986 if($self->get_Transcript) {
987 my $transcript = $self->get_Transcript;
988 return $transcript->translation();
989 }
990 return undef;
991 }
992
993 sub gene {
994 my $self = shift;
995 deprecate('Use get_Gene() instead');
996 return $self->get_Gene;
997 }
998 sub transcript {
999 my $self = shift;
1000 deprecate('Use get_Transcript() instead');
1001 return $self->get_Transcript;
1002 }
1003 sub translation {
1004 my $self = shift;
1005 deprecate('Use get_Translation() instead');
1006 return $self->get_Translation();
1007 }
1008
1009
1010 =head2 get_canonical_Member
1011
1012 Args : none
1013 Example : $canonicalMember = $member->get_canonical_Member
1014 Description: if member is an "ENSEMBLGENE" it will return the canonical peptide / transcript member
1015 if member is an 'ENSEMBLPEP' it will get its gene member and have it
1016 if member is an 'ENSEMBLTRANS' it will get its gene member and have it
1017 return the canonical peptide / transcript (which could be the same as the starting member)
1018 Returntype : Bio::EnsEMBL::Compara::Member or undef
1019 Exceptions : none
1020 Caller : general
1021
1022 =cut
1023
1024 sub get_canonical_Member {
1025 my $self = shift;
1026
1027 return unless($self->adaptor);
1028
1029 my $able_adaptor = UNIVERSAL::can($self->adaptor, 'fetch_canonical_member_for_gene_member_id')
1030 ? $self->adaptor # a MemberAdaptor or derivative
1031 : $self->adaptor->db->get_MemberAdaptor;
1032
1033 if($self->source_name eq 'ENSEMBLGENE') {
1034
1035 return $able_adaptor->fetch_canonical_member_for_gene_member_id($self->dbID);
1036
1037 } elsif(($self->source_name eq 'ENSEMBLPEP') or ($self->source_name eq 'ENSEMBLTRANS')) {
1038
1039 my $geneMember = $self->gene_member or return;
1040
1041 return $able_adaptor->fetch_canonical_member_for_gene_member_id($geneMember->dbID);
1042
1043 } else {
1044
1045 return undef;
1046 }
1047 }
1048
1049
1050 =head2 get_canonical_peptide_Member
1051
1052 Description: DEPRECATED. Use get_canonical_Member() instead
1053
1054 =cut
1055
1056 sub get_canonical_peptide_Member {
1057 my $self = shift;
1058
1059 deprecate('Use get_canonical_Member() instead');
1060 return $self->get_canonical_Member(@_);
1061 }
1062
1063
1064 =head2 get_canonical_transcript_Member
1065
1066 Description: DEPRECATED. Use get_canonical_Member() instead
1067
1068 =cut
1069
1070 sub get_canonical_transcript_Member {
1071 my $self = shift;
1072
1073 deprecate('Use get_canonical_Member() instead');
1074 return $self->get_canonical_Member(@_);
1075 }
1076
1077
1078 =head2 get_all_peptide_Members
1079
1080 Args : none
1081 Example : $pepMembers = $gene_member->get_all_peptide_Members
1082 Description: return listref of all peptide members of this gene_member
1083 Returntype : array ref of Bio::EnsEMBL::Compara::Member
1084 Exceptions : throw if not an ENSEMBLGENE
1085 Caller : general
1086
1087 =cut
1088
1089 sub get_all_peptide_Members {
1090 my $self = shift;
1091
1092 throw("adaptor undefined, can access database") unless($self->adaptor);
1093 throw("not an ENSEMBLGENE member") if($self->source_name ne 'ENSEMBLGENE');
1094
1095 my $able_adaptor = UNIVERSAL::can($self->adaptor, 'fetch_all_peptides_for_gene_member_id')
1096 ? $self->adaptor # a MemberAdaptor or derivative
1097 : $self->adaptor->db->get_MemberAdaptor;
1098
1099
1100 return $able_adaptor->fetch_all_peptides_for_gene_member_id($self->dbID);
1101 }
1102
1103
1104 1;