0
|
1 package Bio::EnsEMBL::Compara::Member;
|
|
2
|
|
3 use strict;
|
|
4 use Bio::Seq;
|
|
5 use Bio::EnsEMBL::Utils::Argument;
|
|
6 use Bio::EnsEMBL::Utils::Exception;
|
|
7 use Bio::EnsEMBL::Gene;
|
|
8 use Bio::EnsEMBL::Compara::GenomeDB;
|
|
9
|
|
10
|
|
11 =head2 new (CONSTRUCTOR)
|
|
12
|
|
13 Arg [-DBID] : (opt)
|
|
14 : int $dbID (the database internal ID for this object)
|
|
15 Arg [-ADAPTOR]
|
|
16 : Bio::EnsEMBL::Compara::DBSQL::Member $adaptor
|
|
17 (the adaptor for connecting to the database)
|
|
18 Arg [-DESCRIPTION] (opt)
|
|
19 : string $description
|
|
20 Arg [-SOURCE_NAME] (opt)
|
|
21 : string $source_name
|
|
22 (e.g., "ENSEMBLGENE", "ENSEMBLPEP", "Uniprot/SWISSPROT", "Uniprot/SPTREMBL")
|
|
23 Arg [-TAXON_ID] (opt)
|
|
24 : int $taxon_id
|
|
25 (NCBI taxonomy id for the member)
|
|
26 Arg [-GENOME_DB_ID] (opt)
|
|
27 : int $genome_db_id
|
|
28 (the $genome_db->dbID for a species in the database)
|
|
29 Arg [-SEQUENCE_ID] (opt)
|
|
30 : int $sequence_id
|
|
31 (the $sequence_id for the sequence table in the database)
|
|
32 Example :
|
|
33 my $member = new Bio::EnsEMBL::Compara::Member;
|
|
34 Description: Creates a new Member object
|
|
35 Returntype : Bio::EnsEMBL::Compara::Member
|
|
36 Exceptions : none
|
|
37 Caller : general
|
|
38 Status : Stable
|
|
39
|
|
40 =cut
|
|
41
|
|
42 sub new {
|
|
43 my ($class, @args) = @_;
|
|
44
|
|
45 my $self = bless {}, $class;
|
|
46
|
|
47 if (scalar @args) {
|
|
48 #do this explicitly.
|
|
49 my ($dbid, $stable_id, $description, $source_name, $adaptor, $taxon_id, $genome_db_id, $sequence_id) = rearrange([qw(DBID STABLE_ID DESCRIPTION SOURCE_NAME ADAPTOR TAXON_ID GENOME_DB_ID SEQUENCE_ID)], @args);
|
|
50
|
|
51 $dbid && $self->dbID($dbid);
|
|
52 $stable_id && $self->stable_id($stable_id);
|
|
53 $description && $self->description($description);
|
|
54 $source_name && $self->source_name($source_name);
|
|
55 $adaptor && $self->adaptor($adaptor);
|
|
56 $taxon_id && $self->taxon_id($taxon_id);
|
|
57 $genome_db_id && $self->genome_db_id($genome_db_id);
|
|
58 $sequence_id && $self->sequence_id($sequence_id);
|
|
59 }
|
|
60
|
|
61 return $self;
|
|
62 }
|
|
63
|
|
64
|
|
65 =head2 copy
|
|
66
|
|
67 Arg [1] : object $parent_object (optional)
|
|
68 Example :
|
|
69 Description: copies the object, optionally by topping up a given structure (to support multiple inheritance)
|
|
70 Returntype :
|
|
71 Exceptions :
|
|
72 Caller :
|
|
73
|
|
74 =cut
|
|
75
|
|
76 sub copy {
|
|
77 my $self = shift;
|
|
78
|
|
79 my $mycopy = @_ ? shift : {};
|
|
80 bless $mycopy, 'Bio::EnsEMBL::Compara::Member';
|
|
81
|
|
82 $mycopy->dbID($self->dbID);
|
|
83 $mycopy->stable_id($self->stable_id);
|
|
84 $mycopy->version($self->version);
|
|
85 $mycopy->description($self->description);
|
|
86 $mycopy->source_name($self->source_name);
|
|
87 #$mycopy->adaptor($self->adaptor);
|
|
88 $mycopy->chr_name($self->chr_name);
|
|
89 $mycopy->chr_start($self->chr_start);
|
|
90 $mycopy->chr_end($self->chr_end);
|
|
91 $mycopy->chr_strand($self->chr_strand);
|
|
92 $mycopy->taxon_id($self->taxon_id);
|
|
93 $mycopy->genome_db_id($self->genome_db_id);
|
|
94 $mycopy->sequence_id($self->sequence_id);
|
|
95 $mycopy->gene_member_id($self->gene_member_id);
|
|
96 $mycopy->display_label($self->display_label);
|
|
97
|
|
98 return $mycopy;
|
|
99 }
|
|
100
|
|
101
|
|
102 =head2 new_fast
|
|
103
|
|
104 Arg [1] : hash reference $hashref
|
|
105 Example : none
|
|
106 Description: This is an ultra fast constructor which requires knowledge of
|
|
107 the objects internals to be used.
|
|
108 Returntype :
|
|
109 Exceptions : none
|
|
110 Caller :
|
|
111
|
|
112 =cut
|
|
113
|
|
114 sub new_fast {
|
|
115 my ($class, $hashref) = @_;
|
|
116
|
|
117 return bless $hashref, $class;
|
|
118 }
|
|
119
|
|
120 =head2 new_from_gene
|
|
121
|
|
122 Args : Requires both an Bio::Ensembl:Gene object and a
|
|
123 : Bio::Ensembl:Compara:GenomeDB object
|
|
124 Example : $member = Bio::EnsEMBL::Compara::Member->new_from_gene(
|
|
125 -gene => $gene,
|
|
126 -genome_db => $genome_db);
|
|
127 Description: contructor method which takes an Ensembl::Gene object
|
|
128 and Compara:GenomeDB object and creates a new Member object
|
|
129 translating from the Gene object
|
|
130 Returntype : Bio::Ensembl::Compara::Member
|
|
131 Exceptions :
|
|
132 Caller :
|
|
133
|
|
134 =cut
|
|
135
|
|
136 sub new_from_gene {
|
|
137 my ($class, @args) = @_;
|
|
138 my $self = $class->new(@args);
|
|
139
|
|
140 if (scalar @args) {
|
|
141
|
|
142 my ($gene, $genome_db) = rearrange([qw(GENE GENOME_DB)], @args);
|
|
143
|
|
144 unless(defined($gene) and $gene->isa('Bio::EnsEMBL::Gene')) {
|
|
145 throw(
|
|
146 "gene arg must be a [Bio::EnsEMBL::Gene] ".
|
|
147 "not a [$gene]");
|
|
148 }
|
|
149 unless(defined($genome_db) and $genome_db->isa('Bio::EnsEMBL::Compara::GenomeDB')) {
|
|
150 throw(
|
|
151 "genome_db arg must be a [Bio::EnsEMBL::Compara::GenomeDB] ".
|
|
152 "not a [$genome_db]");
|
|
153 }
|
|
154 unless (defined $gene->stable_id) {
|
|
155 throw("COREDB error: does not contain gene_stable_id for gene_id ". $gene->dbID."\n");
|
|
156 }
|
|
157
|
|
158 $self->stable_id($gene->stable_id);
|
|
159 $self->taxon_id($genome_db->taxon_id);
|
|
160 $self->description($gene->description);
|
|
161 $self->genome_db_id($genome_db->dbID);
|
|
162 $self->chr_name($gene->seq_region_name);
|
|
163 $self->chr_start($gene->seq_region_start);
|
|
164 $self->chr_end($gene->seq_region_end);
|
|
165 $self->chr_strand($gene->seq_region_strand);
|
|
166 $self->source_name("ENSEMBLGENE");
|
|
167 $self->version($gene->version);
|
|
168 }
|
|
169 return $self;
|
|
170 }
|
|
171
|
|
172
|
|
173 =head2 new_from_transcript
|
|
174
|
|
175 Arg[1] : Bio::Ensembl:Transcript object
|
|
176 Arg[2] : Bio::Ensembl:Compara:GenomeDB object
|
|
177 Arg[3] : string where value='translate' causes transcript object to translate
|
|
178 to a peptide
|
|
179 Example : $member = Bio::EnsEMBL::Compara::Member->new_from_transcript(
|
|
180 $transcript, $genome_db,
|
|
181 -translate);
|
|
182 Description: contructor method which takes an Ensembl::Gene object
|
|
183 and Compara:GenomeDB object and creates a new Member object
|
|
184 translating from the Gene object
|
|
185 Returntype : Bio::Ensembl::Compara::Member
|
|
186 Exceptions :
|
|
187 Caller :
|
|
188
|
|
189 =cut
|
|
190
|
|
191 sub new_from_transcript {
|
|
192 my ($class, @args) = @_;
|
|
193 my $self = $class->new(@args);
|
|
194 my $peptideBioSeq;
|
|
195 my $seq_string;
|
|
196
|
|
197 my ($transcript, $genome_db, $translate, $description) = rearrange([qw(TRANSCRIPT GENOME_DB TRANSLATE DESCRIPTION)], @args);
|
|
198 #my ($transcript, $genome_db, $translate) = @args;
|
|
199
|
|
200 unless(defined($transcript) and $transcript->isa('Bio::EnsEMBL::Transcript')) {
|
|
201 throw(
|
|
202 "transcript arg must be a [Bio::EnsEMBL::Transcript]".
|
|
203 "not a [$transcript]");
|
|
204 }
|
|
205 unless(defined($genome_db) and $genome_db->isa('Bio::EnsEMBL::Compara::GenomeDB')) {
|
|
206 throw(
|
|
207 "genome_db arg must be a [Bio::EnsEMBL::Compara::GenomeDB] ".
|
|
208 "not a [$genome_db]");
|
|
209 }
|
|
210 $self->taxon_id($genome_db->taxon_id);
|
|
211 if(defined($description)) { $self->description($description); }
|
|
212 else { $self->description("NULL"); }
|
|
213 $self->genome_db_id($genome_db->dbID);
|
|
214 $self->chr_name($transcript->seq_region_name);
|
|
215 $self->chr_start($transcript->coding_region_start);
|
|
216 $self->chr_end($transcript->coding_region_end);
|
|
217 $self->chr_strand($transcript->seq_region_strand);
|
|
218 $self->version($transcript->translation->version) if ($translate eq 'yes');
|
|
219
|
|
220 if(($translate eq 'translate') or ($translate eq 'yes')) {
|
|
221 if(not defined($transcript->translation)) {
|
|
222 throw("request to translate a transcript without a defined translation",
|
|
223 $transcript->stable_id);
|
|
224 }
|
|
225 unless (defined $transcript->translation->stable_id) {
|
|
226 throw("COREDB error: does not contain translation stable id for translation_id ".$transcript->translation->dbID."\n");
|
|
227 }
|
|
228
|
|
229 $self->stable_id($transcript->translation->stable_id);
|
|
230 $self->source_name("ENSEMBLPEP");
|
|
231
|
|
232 unless ($peptideBioSeq = $transcript->translate) {
|
|
233 throw("COREDB error: unable to get a BioSeq translation from ". $transcript->stable_id);
|
|
234 }
|
|
235 eval {
|
|
236 $seq_string = $peptideBioSeq->seq;
|
|
237 };
|
|
238 throw "COREDB error: can't get seq from peptideBioSeq" if $@;
|
|
239 # OR
|
|
240 #$seq_string = $transcript->translation->seq;
|
|
241
|
|
242 if ($seq_string =~ /^X+$/) {
|
|
243 warn("X+ in sequence from translation " . $transcript->translation->stable_id."\n");
|
|
244 }
|
|
245 elsif (length($seq_string) == 0) {
|
|
246 warn("zero length sequence from translation " . $transcript->translation->stable_id."\n");
|
|
247 }
|
|
248 else {
|
|
249 #$seq_string =~ s/(.{72})/$1\n/g;
|
|
250 $self->sequence($seq_string);
|
|
251 }
|
|
252 } elsif ($translate eq 'ncrna') {
|
|
253 unless (defined $transcript->stable_id) {
|
|
254 throw("COREDB error: does not contain transcript stable id for transcript_id ".$transcript->dbID."\n");
|
|
255 }
|
|
256 $self->stable_id($transcript->stable_id);
|
|
257 $self->source_name("ENSEMBLTRANS");
|
|
258
|
|
259 unless ($seq_string = $transcript->spliced_seq) {
|
|
260 throw("COREDB error: unable to get a BioSeq spliced_seq from ". $transcript->stable_id);
|
|
261 }
|
|
262 if (length($seq_string) == 0) {
|
|
263 warn("zero length sequence from transcript " . $transcript->stable_id."\n");
|
|
264 }
|
|
265 $self->sequence($seq_string);
|
|
266 }
|
|
267
|
|
268 #print("Member->new_from_transcript\n");
|
|
269 #print(" source_name = '" . $self->source_name . "'\n");
|
|
270 #print(" stable_id = '" . $self->stable_id . "'\n");
|
|
271 #print(" taxon_id = '" . $self->taxon_id . "'\n");
|
|
272 #print(" chr_name = '" . $self->chr_name . "'\n");
|
|
273 return $self;
|
|
274 }
|
|
275
|
|
276
|
|
277 =head2 member_id
|
|
278
|
|
279 Arg [1] : int $member_id (optional)
|
|
280 Example :
|
|
281 Description:
|
|
282 Returntype :
|
|
283 Exceptions :
|
|
284 Caller :
|
|
285
|
|
286 =cut
|
|
287
|
|
288 sub member_id {
|
|
289 my $self = shift;
|
|
290 return $self->dbID(@_);
|
|
291 }
|
|
292
|
|
293
|
|
294 =head2 dbID
|
|
295
|
|
296 Arg [1] : int $dbID (optional)
|
|
297 Example :
|
|
298 Description:
|
|
299 Returntype :
|
|
300 Exceptions :
|
|
301 Caller :
|
|
302
|
|
303 =cut
|
|
304
|
|
305 sub dbID {
|
|
306 my $self = shift;
|
|
307 $self->{'_dbID'} = shift if(@_);
|
|
308 return $self->{'_dbID'};
|
|
309 }
|
|
310
|
|
311 =head2 stable_id
|
|
312
|
|
313 Arg [1] : string $stable_id (optional)
|
|
314 Example :
|
|
315 Description:
|
|
316 Returntype :
|
|
317 Exceptions :
|
|
318 Caller :
|
|
319
|
|
320 =cut
|
|
321
|
|
322 sub stable_id {
|
|
323 my $self = shift;
|
|
324 $self->{'_stable_id'} = shift if(@_);
|
|
325 return $self->{'_stable_id'};
|
|
326 }
|
|
327
|
|
328 =head2 display_label
|
|
329
|
|
330 Arg [1] : string $display_label (optional)
|
|
331 Example :
|
|
332 Description:
|
|
333 Returntype :
|
|
334 Exceptions :
|
|
335 Caller :
|
|
336
|
|
337 =cut
|
|
338
|
|
339 sub display_label {
|
|
340 my $self = shift;
|
|
341 $self->{'_display_label'} = shift if(@_);
|
|
342 return $self->{'_display_label'};
|
|
343 }
|
|
344
|
|
345 =head2 version
|
|
346
|
|
347 Arg [1] :
|
|
348 Example :
|
|
349 Description:
|
|
350 Returntype :
|
|
351 Exceptions :
|
|
352 Caller :
|
|
353
|
|
354 =cut
|
|
355
|
|
356 sub version {
|
|
357 my $self = shift;
|
|
358 $self->{'_version'} = shift if(@_);
|
|
359 $self->{'_version'} = 0 unless(defined($self->{'_version'}));
|
|
360 return $self->{'_version'};
|
|
361 }
|
|
362
|
|
363 =head2 description
|
|
364
|
|
365 Arg [1] : string $description (optional)
|
|
366 Example :
|
|
367 Description:
|
|
368 Returntype : string
|
|
369 Exceptions :
|
|
370 Caller :
|
|
371
|
|
372 =cut
|
|
373
|
|
374 sub description {
|
|
375 my $self = shift;
|
|
376 $self->{'_description'} = shift if(@_);
|
|
377 return $self->{'_description'};
|
|
378 }
|
|
379
|
|
380 =head2 source_name
|
|
381
|
|
382 =cut
|
|
383
|
|
384 sub source_name {
|
|
385 my $self = shift;
|
|
386 $self->{'_source_name'} = shift if (@_);
|
|
387 return $self->{'_source_name'};
|
|
388 }
|
|
389
|
|
390 =head2 adaptor
|
|
391
|
|
392 Arg [1] : string $adaptor (optional)
|
|
393 corresponding to a perl module
|
|
394 Example :
|
|
395 Description:
|
|
396 Returntype :
|
|
397 Exceptions :
|
|
398 Caller :
|
|
399
|
|
400 =cut
|
|
401
|
|
402 sub adaptor {
|
|
403 my $self = shift;
|
|
404 $self->{'_adaptor'} = shift if(@_);
|
|
405 return $self->{'_adaptor'};
|
|
406 }
|
|
407
|
|
408 =head2 chr_name
|
|
409
|
|
410 =cut
|
|
411
|
|
412 sub chr_name {
|
|
413 my $self = shift;
|
|
414 $self->{'_chr_name'} = shift if (@_);
|
|
415 return $self->{'_chr_name'};
|
|
416 }
|
|
417
|
|
418 =head2 chr_start
|
|
419
|
|
420 =cut
|
|
421
|
|
422 sub chr_start {
|
|
423 my $self = shift;
|
|
424 $self->{'_chr_start'} = shift if (@_);
|
|
425 return $self->{'_chr_start'};
|
|
426 }
|
|
427
|
|
428 =head2 chr_end
|
|
429
|
|
430 =cut
|
|
431
|
|
432 sub chr_end {
|
|
433 my $self = shift;
|
|
434 $self->{'_chr_end'} = shift if (@_);
|
|
435 return $self->{'_chr_end'};
|
|
436 }
|
|
437
|
|
438 =head2 chr_strand
|
|
439
|
|
440 Arg [1] : integer
|
|
441 Description: Returns the strand of the member. Defined strands are 1 or -1.
|
|
442 0 is undefined strand.
|
|
443 Returntype : 1,0,-1
|
|
444 Exceptions : none
|
|
445 Caller : general
|
|
446
|
|
447 =cut
|
|
448
|
|
449 sub chr_strand {
|
|
450 my $self = shift;
|
|
451 $self->{'_chr_strand'} = shift if (@_);
|
|
452 $self->{'_chr_strand'}='0' unless(defined($self->{'_chr_strand'}));
|
|
453 return $self->{'_chr_strand'};
|
|
454 }
|
|
455
|
|
456 =head2 taxon_id
|
|
457
|
|
458 =cut
|
|
459
|
|
460 sub taxon_id {
|
|
461 my $self = shift;
|
|
462 $self->{'_taxon_id'} = shift if (@_);
|
|
463 return $self->{'_taxon_id'};
|
|
464 }
|
|
465
|
|
466 =head2 taxon
|
|
467
|
|
468 =cut
|
|
469
|
|
470 sub taxon {
|
|
471 my $self = shift;
|
|
472
|
|
473 if (@_) {
|
|
474 my $taxon = shift;
|
|
475 unless ($taxon->isa('Bio::EnsEMBL::Compara::NCBITaxon')) {
|
|
476 throw(
|
|
477 "taxon arg must be a [Bio::EnsEMBL::Compara::NCBITaxon".
|
|
478 "not a [$taxon]");
|
|
479 }
|
|
480 $self->{'_taxon'} = $taxon;
|
|
481 $self->taxon_id($taxon->ncbi_taxid);
|
|
482 } else {
|
|
483 unless (defined $self->{'_taxon'}) {
|
|
484 unless (defined $self->taxon_id) {
|
|
485 throw("can't fetch Taxon without a taxon_id");
|
|
486 }
|
|
487 my $NCBITaxonAdaptor = $self->adaptor->db->get_NCBITaxonAdaptor;
|
|
488 $self->{'_taxon'} = $NCBITaxonAdaptor->fetch_node_by_taxon_id($self->taxon_id);
|
|
489 }
|
|
490 }
|
|
491
|
|
492 return $self->{'_taxon'};
|
|
493 }
|
|
494
|
|
495 =head2 genome_db_id
|
|
496
|
|
497 =cut
|
|
498
|
|
499 sub genome_db_id {
|
|
500 my $self = shift;
|
|
501 $self->{'_genome_db_id'} = shift if (@_);
|
|
502 return $self->{'_genome_db_id'};
|
|
503 }
|
|
504
|
|
505 =head2 genome_db
|
|
506
|
|
507 =cut
|
|
508
|
|
509 sub genome_db {
|
|
510 my $self = shift;
|
|
511
|
|
512 if (@_) {
|
|
513 my $genome_db = shift;
|
|
514 unless ($genome_db->isa('Bio::EnsEMBL::Compara::GenomeDB')) {
|
|
515 throw(
|
|
516 "arg must be a [Bio::EnsEMBL::Compara::GenomeDB".
|
|
517 "not a [$genome_db]");
|
|
518 }
|
|
519 $self->{'_genome_db'} = $genome_db;
|
|
520 $self->genome_db_id($genome_db->dbID);
|
|
521 } else {
|
|
522 unless (defined $self->{'_genome_db'}) {
|
|
523 unless (defined $self->genome_db_id and defined $self->adaptor) {
|
|
524 throw("can't fetch GenomeDB without an adaptor and genome_db_id");
|
|
525 }
|
|
526 my $GenomeDBAdaptor = $self->adaptor->db->get_GenomeDBAdaptor;
|
|
527 $self->{'_genome_db'} = $GenomeDBAdaptor->fetch_by_dbID($self->genome_db_id);
|
|
528 }
|
|
529 }
|
|
530
|
|
531 return $self->{'_genome_db'};
|
|
532 }
|
|
533
|
|
534 =head2 sequence
|
|
535
|
|
536 Arg [1] : string $sequence
|
|
537 Example : my $seq = $member->sequence;
|
|
538 Description: Get/set the sequence string of this member
|
|
539 Will lazy load by sequence_id if needed and able
|
|
540 Returntype : string
|
|
541 Exceptions : none
|
|
542 Caller : general
|
|
543
|
|
544 =cut
|
|
545
|
|
546 sub sequence {
|
|
547 my $self = shift;
|
|
548
|
|
549 if(@_) {
|
|
550 $self->{'_seq_length'} = undef;
|
|
551 $self->{'_sequence'} = shift;
|
|
552 $self->{'_seq_length'} = length($self->{'_sequence'}) if(defined($self->{'_sequence'}));
|
|
553 return $self->{'_sequence'};
|
|
554 }
|
|
555
|
|
556 if(!defined($self->{'_sequence'}) and
|
|
557 defined($self->sequence_id()) and
|
|
558 defined($self->adaptor))
|
|
559 {
|
|
560 $self->{'_sequence'} = $self->adaptor->db->get_SequenceAdaptor->fetch_by_dbID($self->sequence_id);
|
|
561 $self->{'_seq_length'} = length($self->{'_sequence'}) if(defined($self->{'_sequence'}));
|
|
562 }
|
|
563
|
|
564 return $self->{'_sequence'};
|
|
565 }
|
|
566
|
|
567 =head2 sequence_exon_cased
|
|
568
|
|
569 Args : none
|
|
570 Example : my $sequence_exon_cased = $member->sequence_exon_cased;
|
|
571
|
|
572 Description: Get/set the sequence string of this peptide member with
|
|
573 alternating upper and lower case corresponding to the translateable exons.
|
|
574 Returntype : string
|
|
575 Exceptions : none
|
|
576 Caller : general
|
|
577
|
|
578 =cut
|
|
579
|
|
580 sub sequence_exon_cased {
|
|
581 my $self = shift;
|
|
582
|
|
583 my $sequence = $self->sequence;
|
|
584 my $trans = $self->get_Transcript;
|
|
585 my @exons = @{$trans->get_all_translateable_Exons};
|
|
586 return $sequence if (1 == scalar @exons);
|
|
587
|
|
588 my %splice_site;
|
|
589 my $pep_len = 0;
|
|
590 my $overlap_len = 0;
|
|
591 while (my $exon = shift @exons) {
|
|
592 my $exon_len = $exon->length;
|
|
593 my $pep_seq = $exon->peptide($trans)->length;
|
|
594 # remove the first char of seq if overlap ($exon->peptide()) return full overlapping exon seq
|
|
595 $pep_seq -= 1 if ($overlap_len);
|
|
596 $pep_len += $pep_seq;
|
|
597 if ($overlap_len = (($exon_len + $overlap_len ) %3)){ # if there is an overlap
|
|
598 $splice_site{$pep_len-1}{'overlap'} = $pep_len -1; # stores overlapping aa-exon boundary
|
|
599 } else {
|
|
600 $overlap_len = 0;
|
|
601 }
|
|
602 $splice_site{$pep_len}{'phase'} = $overlap_len; # positions of exon boundary
|
|
603 }
|
|
604
|
|
605 my @exon_sequences = ();
|
|
606 foreach my $pep_len (sort {$b<=>$a} keys %splice_site) { # We start from the end
|
|
607 next if (defined($splice_site{$pep_len}{'overlap'}));
|
|
608 next if ($pep_len > length($sequence)); # Get rid of 1 codon STOP exons in the protein
|
|
609 my $length = $pep_len;
|
|
610 $length-- if (defined($splice_site{$pep_len}{'phase'}) && 1 == $splice_site{$pep_len}{'phase'});
|
|
611 my $peptide;
|
|
612 $peptide = substr($sequence,$length,length($sequence),'');
|
|
613 unshift(@exon_sequences, $peptide);
|
|
614 }
|
|
615 unshift(@exon_sequences, $sequence); # First exon (last piece of sequence left)
|
|
616
|
|
617 my $splice = 1;
|
|
618 foreach my $exon_sequence (@exon_sequences) {
|
|
619 if ($splice % 2 == 0) {
|
|
620 $exon_sequence = lc($exon_sequence);
|
|
621 }
|
|
622 $splice++;
|
|
623 }
|
|
624 my $seqsplice = join("", @exon_sequences);
|
|
625
|
|
626 return $seqsplice;
|
|
627 }
|
|
628
|
|
629 sub sequence_exon_bounded {
|
|
630 my $self = shift;
|
|
631
|
|
632 if(@_) {
|
|
633 $self->{'_sequence_exon_bounded'} = shift;
|
|
634 return $self->{'_sequence_exon_bounded'};
|
|
635 }
|
|
636
|
|
637 if(!defined($self->{'_sequence_exon_bounded'})) {
|
|
638 $self->{'_sequence_exon_bounded'} = $self->adaptor->db->get_SequenceAdaptor->fetch_sequence_exon_bounded_by_member_id($self->member_id);
|
|
639 }
|
|
640
|
|
641 if(!defined($self->{'_sequence_exon_bounded'})) {
|
|
642 $self->{'_sequence_exon_bounded'} = $self->_compose_sequence_exon_bounded;
|
|
643 }
|
|
644
|
|
645 return $self->{'_sequence_exon_bounded'};
|
|
646 }
|
|
647
|
|
648
|
|
649 sub _compose_sequence_exon_bounded {
|
|
650 my $self = shift;
|
|
651
|
|
652 my $sequence = $self->sequence;
|
|
653 my $trans = $self->get_Transcript;
|
|
654 my @exons = @{$trans->get_all_translateable_Exons};
|
|
655 return $sequence if (1 == scalar @exons);
|
|
656
|
|
657 my %splice_site;
|
|
658 my $pep_len = 0;
|
|
659 my $overlap_len = 0;
|
|
660 while (my $exon = shift @exons) {
|
|
661 my $exon_len = $exon->length;
|
|
662 my $pep_seq = $exon->peptide($trans)->length;
|
|
663 # remove the first char of seq if overlap ($exon->peptide()) return full overlapping exon seq
|
|
664 $pep_seq -= 1 if ($overlap_len);
|
|
665 $pep_len += $pep_seq;
|
|
666 if ($overlap_len = (($exon_len + $overlap_len ) %3)){ # if there is an overlap
|
|
667 $splice_site{$pep_len-1}{'overlap'} = $pep_len -1; # stores overlapping aa-exon boundary
|
|
668 } else {
|
|
669 $overlap_len = 0;
|
|
670 }
|
|
671 $splice_site{$pep_len}{'phase'} = $overlap_len; # positions of exon boundary
|
|
672 }
|
|
673
|
|
674 my $seqsplice = '';
|
|
675 foreach my $pep_len (sort {$b<=>$a} keys %splice_site) { # We start from the end
|
|
676 next if (defined($splice_site{$pep_len}{'overlap'}));
|
|
677 next if ($pep_len > length($sequence)); # Get rid of 1 codon STOP exons in the protein
|
|
678 my $length = $pep_len;
|
|
679 $length-- if (defined($splice_site{$pep_len}{'phase'}) && 1 == $splice_site{$pep_len}{'phase'});
|
|
680 my $peptide;
|
|
681 $peptide = substr($sequence,$length,length($sequence),'');
|
|
682 $seqsplice = $peptide . $seqsplice;
|
|
683 $seqsplice = 'o' . $seqsplice if (0 == $splice_site{$pep_len}{'phase'});
|
|
684 $seqsplice = 'b' . $seqsplice if (1 == $splice_site{$pep_len}{'phase'});
|
|
685 $seqsplice = 'j' . $seqsplice if (2 == $splice_site{$pep_len}{'phase'});
|
|
686 }
|
|
687 $seqsplice = $sequence . $seqsplice; # First exon AS IS
|
|
688
|
|
689 return $seqsplice;
|
|
690 }
|
|
691
|
|
692 sub sequence_cds {
|
|
693 my $self = shift;
|
|
694
|
|
695 if(@_) {
|
|
696 $self->{'_sequence_cds'} = shift;
|
|
697 return $self->{'_sequence_cds'};
|
|
698 }
|
|
699
|
|
700 if(!defined($self->{'_sequence_cds'})) {
|
|
701 $self->{'_sequence_cds'} = $self->adaptor->db->get_SequenceAdaptor->fetch_sequence_cds_by_member_id($self->member_id);
|
|
702 }
|
|
703
|
|
704 if(!defined($self->{'_sequence_cds'})) {
|
|
705 $self->{'_sequence_cds'} = $self->get_Transcript->translateable_seq;
|
|
706 }
|
|
707
|
|
708 return $self->{'_sequence_cds'};
|
|
709 }
|
|
710
|
|
711 # GJ 2008-11-17
|
|
712 # Returns the amino acid sequence with exon boundaries denoted as O, B, or J depending on the phase (O=0, B=1, J=2)
|
|
713 sub get_exon_bounded_sequence {
|
|
714 my $self = shift;
|
|
715 my $numbers = shift;
|
|
716 my $transcript = $self->get_Transcript;
|
|
717
|
|
718 # The get_all_translateable_exons creates a list of reformatted "translateable" exon sequences.
|
|
719 # When the exon phase is 1 or 2, there will be duplicated residues at the end and start of exons.
|
|
720 # We'll deal with this during the exon loop.
|
|
721 my @exons = @{$transcript->get_all_translateable_Exons};
|
|
722 my $seq_string = "";
|
|
723 # for my $ex (@exons) {
|
|
724 while (my $ex = shift @exons) {
|
|
725 my $seq = $ex->peptide($transcript)->seq;
|
|
726
|
|
727 # PHASE HANDLING
|
|
728 my $phase = $ex->phase;
|
|
729 my $end_phase = $ex->end_phase;
|
|
730
|
|
731 # First, cut off repeated end residues.
|
|
732 if ($end_phase == 1 && 0 < scalar @exons) {
|
|
733 # We only own 1/3, so drop the last residue.
|
|
734 $seq = substr($seq,0,-1);
|
|
735 }
|
|
736
|
|
737 # Now cut off repeated start residues.
|
|
738 if ($phase == 2) {
|
|
739 # We only own 1/3, so drop the first residue.
|
|
740 $seq = substr($seq, 1);
|
|
741 }
|
|
742
|
|
743 if ($end_phase > -1) {
|
|
744 $seq = $seq . 'o' if ($end_phase == 0);
|
|
745 $seq = $seq . 'b' if ($end_phase == 1);
|
|
746 $seq = $seq . 'j' if ($end_phase == 2);
|
|
747 }
|
|
748 #print "Start_phase: $phase End_phase: $end_phase\t$seq\n";
|
|
749 $seq_string .= $seq;
|
|
750 }
|
|
751 if (defined $numbers) {
|
|
752 $seq_string =~ s/o/0/g; $seq_string =~ s/b/1/g; $seq_string =~ s/j/2/g;
|
|
753 }
|
|
754 return $seq_string;
|
|
755 }
|
|
756
|
|
757 =head2 seq_length
|
|
758
|
|
759 Example : my $seq_length = $member->seq_length;
|
|
760 Description: get the sequence length of this member
|
|
761 Returntype : int
|
|
762 Exceptions : none
|
|
763 Caller : general
|
|
764
|
|
765 =cut
|
|
766
|
|
767 sub seq_length {
|
|
768 my $self = shift;
|
|
769
|
|
770 unless(defined($self->{'_seq_length'})) {
|
|
771 #need to check case if user is calling seq_length first
|
|
772 #call $self->sequence (to lazy load if needed)
|
|
773 my $seq = $self->sequence;
|
|
774 $self->{'_seq_length'} = length($seq) if(defined($seq));
|
|
775 }
|
|
776 return $self->{'_seq_length'};
|
|
777 }
|
|
778
|
|
779
|
|
780 =head2 sequence_id
|
|
781
|
|
782 Arg [1] : int $sequence_id
|
|
783 Example : my $sequence_id = $member->sequence_id;
|
|
784 Description: Extracts the sequence_id of this member
|
|
785 Returntype : int
|
|
786 Exceptions : none
|
|
787 Caller : general
|
|
788
|
|
789 =cut
|
|
790
|
|
791 sub sequence_id {
|
|
792 my $self = shift;
|
|
793 $self->{'_sequence_id'} = shift if(@_);
|
|
794 if(!defined($self->{'_sequence_id'})) { $self->{'_sequence_id'}=0; }
|
|
795 return $self->{'_sequence_id'};
|
|
796 }
|
|
797
|
|
798 =head2 gene_member_id
|
|
799
|
|
800 Arg [1] : int $gene_member_id
|
|
801 Example : my $gene_member_id = $member->gene_member_id;
|
|
802 Description: Gene_member_id of this protein member
|
|
803 Returntype : int
|
|
804 Exceptions : none
|
|
805 Caller : general
|
|
806
|
|
807 =cut
|
|
808
|
|
809 sub gene_member_id {
|
|
810 my $self = shift;
|
|
811 $self->{'_gene_member_id'} = shift if(@_);
|
|
812 return $self->{'_gene_member_id'};
|
|
813 }
|
|
814
|
|
815
|
|
816 =head2 bioseq
|
|
817
|
|
818 Args : none
|
|
819 Example : my $primaryseq = $member->primaryseq;
|
|
820 Description: returns sequence this member as a Bio::Seq object
|
|
821 Returntype : Bio::Seq object
|
|
822 Exceptions : none
|
|
823 Caller : general
|
|
824
|
|
825 =cut
|
|
826
|
|
827 sub bioseq {
|
|
828 my $self = shift;
|
|
829
|
|
830 throw("Member stable_id undefined") unless defined($self->stable_id());
|
|
831 throw("No sequence for member " . $self->stable_id()) unless defined($self->sequence());
|
|
832
|
|
833 my $seqname;
|
|
834 if (defined($self->genome_db_id) and defined($self->dbID)) {
|
|
835 $seqname = "IDs:" . $self->genome_db_id . ":" . $self->dbID . " " .
|
|
836 $self->source_name . ":" . $self->stable_id;
|
|
837 } else {
|
|
838 $seqname = $self->source_name . ":" . $self->stable_id;
|
|
839 }
|
|
840 my $seq = Bio::Seq->new(-seq => $self->sequence(),
|
|
841 -primary_id => "member_id_".$self->dbID,
|
|
842 -display_id => "member_id_".$self->dbID,
|
|
843 -desc => $seqname ."|". $self->description(),
|
|
844 );
|
|
845 return $seq;
|
|
846 }
|
|
847
|
|
848 =head2 gene_member
|
|
849
|
|
850 Arg[1] : Bio::EnsEMBL::Compara::Member $geneMember (optional)
|
|
851 Example : my $gene_member = $member->gene_member;
|
|
852 Description: returns gene member object for this protein member
|
|
853 Returntype : Bio::EnsEMBL::Compara::Member object
|
|
854 Exceptions : if arg[0] is not a Bio::EnsEMBL::Compara::Member object
|
|
855 Caller : MemberAdaptor(set), general
|
|
856
|
|
857 =cut
|
|
858
|
|
859 sub gene_member {
|
|
860 my $self = shift;
|
|
861 my $gene_member = shift;
|
|
862
|
|
863 if ($gene_member) {
|
|
864 throw("arg must be a [Bio::EnsEMBL::Compara::Member] not a [$gene_member]")
|
|
865 unless ($gene_member->isa('Bio::EnsEMBL::Compara::Member'));
|
|
866 $self->{'_gene_member'} = $gene_member;
|
|
867 }
|
|
868 if(!defined($self->{'_gene_member'}) and
|
|
869 defined($self->adaptor) and $self->dbID)
|
|
870 {
|
|
871 $self->{'_gene_member'} = $self->adaptor->db->get_MemberAdaptor->fetch_gene_for_peptide_member_id($self->dbID);
|
|
872 }
|
|
873 return $self->{'_gene_member'};
|
|
874 }
|
|
875
|
|
876 =head2 print_member
|
|
877
|
|
878 Arg[1] : string to be prrinted instead of "\n"
|
|
879 Example : $member->print_member("BRH");
|
|
880 Description: used for debugging, prints out key descriptive elements
|
|
881 of member
|
|
882 Returntype : none
|
|
883 Exceptions : none
|
|
884 Caller : general
|
|
885
|
|
886 =cut
|
|
887
|
|
888 sub print_member
|
|
889
|
|
890 {
|
|
891 my $self = shift;
|
|
892 my $postfix = shift;
|
|
893
|
|
894 printf(" %s %s(%d)\t%s : %d-%d",$self->source_name, $self->stable_id,
|
|
895 $self->dbID,$self->chr_name,$self->chr_start, $self->chr_end);
|
|
896 if($postfix) { print(" $postfix"); }
|
|
897 else { print("\n"); }
|
|
898 }
|
|
899
|
|
900
|
|
901 =head2 get_Gene
|
|
902
|
|
903 Args : none
|
|
904 Example : $gene = $member->get_Gene
|
|
905 Description: if member is an 'ENSEMBLGENE' returns Bio::EnsEMBL::Gene object
|
|
906 by connecting to ensembl genome core database
|
|
907 REQUIRES properly setup Registry conf file or
|
|
908 manually setting genome_db->db_adaptor for each genome.
|
|
909 Returntype : Bio::EnsEMBL::Gene or undef
|
|
910 Exceptions : none
|
|
911 Caller : general
|
|
912
|
|
913 =cut
|
|
914
|
|
915 sub get_Gene {
|
|
916 my $self = shift;
|
|
917
|
|
918 return $self->{'core_gene'} if($self->{'core_gene'});
|
|
919
|
|
920 unless($self->genome_db and
|
|
921 $self->genome_db->db_adaptor and
|
|
922 $self->genome_db->db_adaptor->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'))
|
|
923 {
|
|
924 throw("unable to connect to core ensembl database: missing registry and genome_db.locator");
|
|
925 }
|
|
926
|
|
927 my $coreDBA = $self->genome_db->db_adaptor;
|
|
928 if($self->source_name eq 'ENSEMBLGENE') {
|
|
929 $self->{'core_gene'} = $coreDBA->get_GeneAdaptor->fetch_by_stable_id($self->stable_id);
|
|
930 }
|
|
931 if($self->source_name eq 'ENSEMBLPEP') {
|
|
932 $self->{'core_gene'} = $coreDBA->get_GeneAdaptor->fetch_by_stable_id($self->gene_member->stable_id);
|
|
933 }
|
|
934 return $self->{'core_gene'};
|
|
935 }
|
|
936
|
|
937 =head2 get_Transcript
|
|
938
|
|
939 Args : none
|
|
940 Example : $transcript = $member->get_Transcript
|
|
941 Description: if member is an 'ENSEMBLPEP' returns Bio::EnsEMBL::Transcript object
|
|
942 by connecting to ensembl genome core database
|
|
943 REQUIRES properly setup Registry conf file or
|
|
944 manually setting genome_db->db_adaptor for each genome.
|
|
945 Returntype : Bio::EnsEMBL::Transcript or undef
|
|
946 Exceptions : none
|
|
947 Caller : general
|
|
948
|
|
949 =cut
|
|
950
|
|
951 sub get_Transcript {
|
|
952 my $self = shift;
|
|
953
|
|
954 return undef unless($self->source_name eq 'ENSEMBLPEP');
|
|
955 return $self->{'core_transcript'} if($self->{'core_transcript'});
|
|
956
|
|
957 unless($self->genome_db and
|
|
958 $self->genome_db->db_adaptor and
|
|
959 $self->genome_db->db_adaptor->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'))
|
|
960 {
|
|
961 throw("unable to connect to core ensembl database: missing registry and genome_db.locator");
|
|
962 }
|
|
963 my $coreDBA = $self->genome_db->db_adaptor;
|
|
964 $self->{'core_transcript'} = $coreDBA->get_TranscriptAdaptor->fetch_by_translation_stable_id($self->stable_id);
|
|
965 return $self->{'core_transcript'};
|
|
966 }
|
|
967
|
|
968
|
|
969 =head2 get_Translation
|
|
970
|
|
971 Args : none
|
|
972 Example : $translation = $member->get_Translation
|
|
973 Description: if member is an 'ENSEMBLPEP' returns Bio::EnsEMBL::Translation object
|
|
974 by connecting to ensembl genome core database
|
|
975 REQUIRES properly setup Registry conf file or
|
|
976 manually setting genome_db->db_adaptor for each genome.
|
|
977 Returntype : Bio::EnsEMBL::Gene or undef
|
|
978 Exceptions : none
|
|
979 Caller : general
|
|
980
|
|
981 =cut
|
|
982
|
|
983 sub get_Translation {
|
|
984 my $self = shift;
|
|
985
|
|
986 if($self->get_Transcript) {
|
|
987 my $transcript = $self->get_Transcript;
|
|
988 return $transcript->translation();
|
|
989 }
|
|
990 return undef;
|
|
991 }
|
|
992
|
|
993 sub gene {
|
|
994 my $self = shift;
|
|
995 deprecate('Use get_Gene() instead');
|
|
996 return $self->get_Gene;
|
|
997 }
|
|
998 sub transcript {
|
|
999 my $self = shift;
|
|
1000 deprecate('Use get_Transcript() instead');
|
|
1001 return $self->get_Transcript;
|
|
1002 }
|
|
1003 sub translation {
|
|
1004 my $self = shift;
|
|
1005 deprecate('Use get_Translation() instead');
|
|
1006 return $self->get_Translation();
|
|
1007 }
|
|
1008
|
|
1009
|
|
1010 =head2 get_canonical_Member
|
|
1011
|
|
1012 Args : none
|
|
1013 Example : $canonicalMember = $member->get_canonical_Member
|
|
1014 Description: if member is an "ENSEMBLGENE" it will return the canonical peptide / transcript member
|
|
1015 if member is an 'ENSEMBLPEP' it will get its gene member and have it
|
|
1016 if member is an 'ENSEMBLTRANS' it will get its gene member and have it
|
|
1017 return the canonical peptide / transcript (which could be the same as the starting member)
|
|
1018 Returntype : Bio::EnsEMBL::Compara::Member or undef
|
|
1019 Exceptions : none
|
|
1020 Caller : general
|
|
1021
|
|
1022 =cut
|
|
1023
|
|
1024 sub get_canonical_Member {
|
|
1025 my $self = shift;
|
|
1026
|
|
1027 return unless($self->adaptor);
|
|
1028
|
|
1029 my $able_adaptor = UNIVERSAL::can($self->adaptor, 'fetch_canonical_member_for_gene_member_id')
|
|
1030 ? $self->adaptor # a MemberAdaptor or derivative
|
|
1031 : $self->adaptor->db->get_MemberAdaptor;
|
|
1032
|
|
1033 if($self->source_name eq 'ENSEMBLGENE') {
|
|
1034
|
|
1035 return $able_adaptor->fetch_canonical_member_for_gene_member_id($self->dbID);
|
|
1036
|
|
1037 } elsif(($self->source_name eq 'ENSEMBLPEP') or ($self->source_name eq 'ENSEMBLTRANS')) {
|
|
1038
|
|
1039 my $geneMember = $self->gene_member or return;
|
|
1040
|
|
1041 return $able_adaptor->fetch_canonical_member_for_gene_member_id($geneMember->dbID);
|
|
1042
|
|
1043 } else {
|
|
1044
|
|
1045 return undef;
|
|
1046 }
|
|
1047 }
|
|
1048
|
|
1049
|
|
1050 =head2 get_canonical_peptide_Member
|
|
1051
|
|
1052 Description: DEPRECATED. Use get_canonical_Member() instead
|
|
1053
|
|
1054 =cut
|
|
1055
|
|
1056 sub get_canonical_peptide_Member {
|
|
1057 my $self = shift;
|
|
1058
|
|
1059 deprecate('Use get_canonical_Member() instead');
|
|
1060 return $self->get_canonical_Member(@_);
|
|
1061 }
|
|
1062
|
|
1063
|
|
1064 =head2 get_canonical_transcript_Member
|
|
1065
|
|
1066 Description: DEPRECATED. Use get_canonical_Member() instead
|
|
1067
|
|
1068 =cut
|
|
1069
|
|
1070 sub get_canonical_transcript_Member {
|
|
1071 my $self = shift;
|
|
1072
|
|
1073 deprecate('Use get_canonical_Member() instead');
|
|
1074 return $self->get_canonical_Member(@_);
|
|
1075 }
|
|
1076
|
|
1077
|
|
1078 =head2 get_all_peptide_Members
|
|
1079
|
|
1080 Args : none
|
|
1081 Example : $pepMembers = $gene_member->get_all_peptide_Members
|
|
1082 Description: return listref of all peptide members of this gene_member
|
|
1083 Returntype : array ref of Bio::EnsEMBL::Compara::Member
|
|
1084 Exceptions : throw if not an ENSEMBLGENE
|
|
1085 Caller : general
|
|
1086
|
|
1087 =cut
|
|
1088
|
|
1089 sub get_all_peptide_Members {
|
|
1090 my $self = shift;
|
|
1091
|
|
1092 throw("adaptor undefined, can access database") unless($self->adaptor);
|
|
1093 throw("not an ENSEMBLGENE member") if($self->source_name ne 'ENSEMBLGENE');
|
|
1094
|
|
1095 my $able_adaptor = UNIVERSAL::can($self->adaptor, 'fetch_all_peptides_for_gene_member_id')
|
|
1096 ? $self->adaptor # a MemberAdaptor or derivative
|
|
1097 : $self->adaptor->db->get_MemberAdaptor;
|
|
1098
|
|
1099
|
|
1100 return $able_adaptor->fetch_all_peptides_for_gene_member_id($self->dbID);
|
|
1101 }
|
|
1102
|
|
1103
|
|
1104 1;
|