comparison variant_effect_predictor/Bio/EnsEMBL/Compara/GenomicAlignTree.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =head1 NAME
20
21 Bio::EnsEMBL::Compara::GenomicAlignTree
22
23 =head1 SYNOPSIS
24
25 =head1 DESCRIPTION
26
27 Specific subclass of NestedSet to add functionality when the nodes of this tree
28 are GenomicAlign objects and the tree is a representation of a Protein derived
29 Phylogenetic tree
30
31 =head1 APPENDIX
32
33 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
34
35 =cut
36
37
38 package Bio::EnsEMBL::Compara::GenomicAlignTree;
39
40 use strict;
41 use Bio::EnsEMBL::Utils::Argument;
42 use Bio::EnsEMBL::Utils::Exception;
43 use Bio::SimpleAlign;
44 use IO::File;
45
46 use Bio::EnsEMBL::Compara::NestedSet;
47 use Bio::EnsEMBL::Compara::BaseGenomicAlignSet;
48
49 our @ISA = qw(Bio::EnsEMBL::Compara::NestedSet Bio::EnsEMBL::Compara::BaseGenomicAlignSet);
50
51
52 =head2 left_node_id
53
54 Arg [1] : (optional) $left_node_id
55 Example : $object->left_node_id($left_node_id);
56 Example : $left_node_id = $object->left_node_id();
57 Description : Getter/setter for the left_node_id attribute
58 Returntype : integer
59 Exceptions : none
60 Caller : general
61 Status : Stable
62
63 =cut
64
65 sub left_node_id {
66 my $self = shift;
67 if (@_) {
68 $self->{_left_node_id} = shift;
69 }
70 return $self->{_left_node_id};
71 }
72
73
74 =head2 right_node_id
75
76 Arg [1] : (optional) $right_node_id
77 Example : $object->right_node_id($right_node_id);
78 Example : $right_node_id = $object->right_node_id();
79 Description : Getter/setter for the right_node_id attribute
80 Returntype : integer
81 Exceptions : none
82 Caller : general
83 Status : Stable
84
85 =cut
86
87 sub right_node_id {
88 my $self = shift;
89 if (@_) {
90 $self->{_right_node_id} = shift;
91 }
92 return $self->{_right_node_id};
93 }
94
95
96 =head2 left_node
97
98 Arg [1] : -none-
99 Example : $left_node = $object->left_node();
100 Description : Get the left_node object from the database.
101 Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree object
102 Exceptions : none
103 Caller : general
104 Status : Stable
105
106 =cut
107
108 sub left_node {
109 my $self = shift;
110 if ($self->{_left_node_id} and $self->adaptor) {
111 return $self->adaptor->fetch_node_by_node_id($self->{_left_node_id});
112 }
113 return undef;
114 }
115
116
117 =head2 right_node
118
119 Arg [1] : -none-
120 Example : $left_node = $object->right_node();
121 Description : Get the right_node object from the database.
122 Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree object
123 Exceptions : none
124 Caller : general
125 Status : Stable
126
127 =cut
128
129 sub right_node {
130 my $self = shift;
131 if ($self->{_right_node_id} and $self->adaptor) {
132 return $self->adaptor->fetch_node_by_node_id($self->{_right_node_id});
133 }
134 return undef;
135 }
136
137 =head2 ancestral_genomic_align_block_id
138
139 Arg [1] : (optional) $ancestral_genomic_align_block_id
140 Example : $object->ancestral_genomic_align_block_id($ancestral_genomic_align_block_id);
141 Example : $ancestral_genomic_align_block_id = $object->ancestral_genomic_align_block_id();
142 Description : Getter/setter for the ancestral_genomic_align_block_id attribute
143 This attribute is intended for the root of the tree only!
144 Returntype : int
145 Exceptions : none
146 Caller : general
147 Status : Stable
148
149 =cut
150
151 sub ancestral_genomic_align_block_id {
152 my $self = shift;
153 if (@_) {
154 $self->{_ancestral_genomic_align_block_id} = shift;
155 }
156 return $self->{_ancestral_genomic_align_block_id};
157 }
158
159
160 =head2 modern_genomic_align_block_id
161
162 Arg [1] : (optional) $modern_genomic_align_block_id
163 Example : $object->modern_genomic_align_block_id($modern_genomic_align_block_id);
164 Example : $modern_genomic_align_block_id = $object->modern_genomic_align_block_id();
165 Description : Getter/setter for the modern_genomic_align_block_id attribute
166 Returntype :
167 Exceptions : none
168 Caller : general
169 Status : Stable
170
171 =cut
172
173 sub modern_genomic_align_block_id {
174 my $self = shift;
175 if (@_) {
176 $self->{_modern_genomic_align_block_id} = shift;
177 }
178 return $self->{_modern_genomic_align_block_id};
179 }
180
181
182 =head2 genomic_align_group
183
184 Arg [1] : (optional) $genomic_align_group
185 Example : $object->genomic_align_group($genomic_align_group);
186 Example : $genomic_align_group = $object->genomic_align_group();
187 Description : Getter/setter for the genomic_align_group attribute
188 Returntype : Bio::EnsEMBL::Compara::GenomicAlignGroup object
189 Exceptions : none
190 Caller : general
191 Status : Stable
192
193 =cut
194
195 sub genomic_align_group {
196 my $self = shift;
197 if (@_) {
198 $self->{_genomic_align_group} = shift;
199 }
200 return $self->{_genomic_align_group};
201 }
202
203
204 =head2 get_all_genomic_aligns_for_node
205
206 Arg [1] : -none-
207 Example : $genomic_aligns = $object->get_all_genomic_aligns_for_node
208 Description : Getter for all the GenomicAligns contained in the
209 genomic_align_group object on a node. This method is a short
210 cut for $object->genomic_align_group->get_all_GenomicAligns()
211 Returntype : listref of Bio::EnsEMBL::Compara::GenomicAlign objects
212 Exceptions : none
213 Caller : general
214 Status : Stable
215
216 =cut
217
218 sub get_all_genomic_aligns_for_node {
219 my $self = shift(@_);
220 return [] if (!$self->genomic_align_group);
221 return $self->genomic_align_group->get_all_GenomicAligns;
222 }
223
224 =head2 genomic_align_array (DEPRECATED)
225
226 Arg [1] : -none-
227 Example : $genomic_aligns = $object->genomic_align_array
228 Description : Alias for get_all_genomic_aligns_for_node. TO BE DEPRECATED
229 Returntype : listref of Bio::EnsEMBL::Compara::GenomicAlign objects
230 Exceptions : none
231 Caller : general
232 Status : Stable
233
234 =cut
235
236 sub genomic_align_array {
237 my $self = shift(@_);
238
239 deprecate("Use Bio::EnsEMBL::Compara::GenomicAlignTree->get_all_genomic_aligns_for_node() method instead");
240 return($self->get_all_genomic_aligns_for_node);
241
242 }
243
244 =head2 get_all_GenomicAligns (DEPRECATED)
245
246 Arg [1] : -none-
247 Example : $genomic_aligns = $object->get_all_GenomicAligns
248 Description : Alias for get_all_genomic_aligns_for_node. TO BE DEPRECATED
249 Returntype : listref of Bio::EnsEMBL::Compara::GenomicAlign objects
250 Exceptions : none
251 Caller : general
252 Status : Stable
253
254 =cut
255
256 sub get_all_GenomicAligns {
257 my $self = shift(@_);
258
259 deprecate("Use Bio::EnsEMBL::Compara::GenomicAlignTree->get_all_genomic_aligns_for_node() method instead");
260 return($self->get_all_genomic_aligns_for_node);
261
262 }
263
264 =head2 reference_genomic_align
265
266 Arg [1] : (optional) $reference_genomic_align
267 Example : $object->reference_genomic_align($reference_genomic_align);
268 Example : $reference_genomic_align = $object->reference_genomic_align();
269 Description : Getter/setter for the reference_genomic_align attribute
270 Returntype : Bio::EnsEMBL::Compara::GenomicAlign object
271 Exceptions : none
272 Caller : general
273 Status : Stable
274
275 =cut
276
277 sub reference_genomic_align {
278 my $self = shift;
279
280 if (@_) {
281 $self->{reference_genomic_align} = shift;
282 }
283
284 return $self->{reference_genomic_align};
285 }
286
287 =head2 reference_genomic_align_id
288
289 Arg [1] : integer $reference_genomic_align_id
290 Example : $genomic_align_block->reference_genomic_align_id(4321);
291 Description: get/set for attribute reference_genomic_align_id. A value of 0 will set the
292 reference_genomic_align_id attribute to undef. When looking for genomic
293 alignments in a given slice or dnafrag, the reference_genomic_align
294 corresponds to the Bio::EnsEMBL::Compara::GenomicAlign included in the
295 starting slice or dnafrag. The reference_genomic_align_id is the dbID
296 corresponding to the reference_genomic_align. All remaining
297 Bio::EnsEMBL::Compara::GenomicAlign objects included in the
298 Bio::EnsEMBL::Compara::GenomicAlignBlock are the
299 non_reference_genomic_aligns.
300 Synchronises reference_genomic_align and reference_genomic_align_id
301 attributes.
302 Returntype : integer
303 Exceptions : throw if $reference_genomic_align_id id not a postive number
304 Caller : $genomic_align_block->reference_genomic_align_id(int)
305 Status : Stable
306
307 =cut
308
309 sub reference_genomic_align_id {
310 my ($self, $reference_genomic_align_id) = @_;
311
312 if (defined($reference_genomic_align_id)) {
313 if ($reference_genomic_align_id !~ /^\d+$/) {
314 throw "[$reference_genomic_align_id] should be a positive number.";
315 }
316 $self->{'reference_genomic_align_id'} = ($reference_genomic_align_id or undef);
317
318 ## Synchronises reference_genomic_align and reference_genomic_align_id
319 if (defined($self->{'reference_genomic_align'}) and
320 defined($self->{'reference_genomic_align'}->dbID) and
321 ($self->{'reference_genomic_align'}->dbID != ($self->{'reference_genomic_align_id'} or 0))) {
322 $self->{'reference_genomic_align'} = undef; ## Attribute will be set on request
323 }
324
325 ## Try to get data from other sources...
326 } elsif (!defined($self->{'reference_genomic_align_id'})) {
327
328 ## ...from the reference_genomic_align attribute
329 if (defined($self->{'reference_genomic_align'}) and
330 defined($self->{'reference_genomic_align'}->dbID)) {
331 $self->{'reference_genomic_align_id'} = $self->{'reference_genomic_align'}->dbID;
332 }
333 }
334
335 return $self->{'reference_genomic_align_id'};
336 }
337
338
339 =head2 reference_genomic_align_node
340
341 Arg [1] : (optional) $reference_genomic_align_node
342 Example : $object->reference_genomic_align_node($reference_genomic_align_node);
343 Example : $reference_genomic_align_node = $object->reference_genomic_align_node();
344 Description : Getter/setter for the reference_genomic_align_node attribute
345 Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree object
346 Exceptions : none
347 Caller : general
348 Status : Stable
349
350 =cut
351
352 sub reference_genomic_align_node {
353 my $self = shift;
354
355 if (@_) {
356 $self->{reference_genomic_align_node} = shift;
357 }
358
359 return $self->{reference_genomic_align_node};
360 }
361
362
363 =head2 aligned_sequence
364
365 Arg [1] : -none-
366 Example : $aligned_sequence = $object->aligned_sequence();
367 Description : Get the aligned sequence for this node. When the node
368 contains one single sequence, it returns its aligned sequence.
369 For composite segments, it returns the combined aligned seq.
370 Returntype : string
371 Exceptions : none
372 Caller : general
373 Status : Stable
374
375 =cut
376
377 sub aligned_sequence {
378 my $self = shift;
379 return $self->genomic_align_group->aligned_sequence(@_);
380 }
381
382
383 =head2 group_id
384
385 Arg [1] : integer $group_id
386 Example : my $group_id = $genomic_align_tree->group_id;
387 Example : $genomic_align_tree->group_id(1234);
388 Description: get/set for attribute group_id of the underlying
389 GenomicAlignBlock objects
390 Returntype : integer
391 Exceptions : A GenomicAlignTree is made of two GenomicAlignBlock
392 object. The method fail when gettign the value if the
393 two group_ids do not match
394 Caller : general
395
396 =cut
397
398 sub group_id {
399 my ($self, $group_id) = @_;
400
401 if (defined($group_id)) {
402 $self->{'group_id'} = $group_id;
403 # Set the group_id on the genomic_align_blocks...
404 my %genomic_align_blocks;
405 #foreach my $this_genomic_align_node (@{$self->get_all_sorted_genomic_align_nodes()}) {
406 foreach my $this_genomic_align_node (@{$self->get_all_nodes()}) {
407 next if (!defined $this_genomic_align_node->genomic_align_group);
408 foreach my $genomic_align (@{$this_genomic_align_node->genomic_align_group->get_all_GenomicAligns}) {
409 my $this_genomic_align_block = $genomic_align->genomic_align_block;
410 if ($this_genomic_align_block and !defined($genomic_align_blocks{$this_genomic_align_block})) {
411 $this_genomic_align_block->group_id($group_id);
412 $genomic_align_blocks{$this_genomic_align_block} = 1;
413 }
414 }
415 }
416 } elsif (!defined($self->{'group_id'}) and defined($self->{adaptor})) {
417 # Try to get the ID from other sources...
418 my %group_ids;
419 my $genomic_align_block_adaptor = $self->adaptor->dba->get_GenomicAlignBlockAdaptor;
420 foreach my $this_genomic_align_node (@{$self->get_all_nodes()}) {
421 next if (!defined $this_genomic_align_node->genomic_align_group);
422 foreach my $genomic_align (@{$this_genomic_align_node->genomic_align_group->get_all_GenomicAligns}) {
423 my $this_genomic_align_block_id = $genomic_align->genomic_align_block_id;
424 my $this_genomic_align_block = $genomic_align_block_adaptor->fetch_by_dbID($this_genomic_align_block_id);
425 if ($this_genomic_align_block->group_id) {
426 $group_ids{$this_genomic_align_block->group_id} = 1;
427 } else {
428 $group_ids{"undef"} = 1;
429 }
430 }
431 }
432 if (keys %group_ids == 1) {
433 if (!defined($group_ids{"undef"})) {
434 $self->{'group_id'} = (keys %group_ids)[0];
435 }
436 } else {
437 warning("Different group_ids found for this GenomicAlignTree\n");
438 }
439 }
440 return $self->{'group_id'};
441 }
442
443
444 =head2 name
445
446 Arg [1] : (optional) string $name
447 Example : $object->name($name);
448 Example : $name = $object->name();
449 Description : Getter/setter for the name attribute.
450 Returntype : string
451 Exceptions : none
452 Caller : general
453 Status : At risk
454
455 =cut
456
457 sub name {
458 my $self = shift;
459
460 if (@_) {
461 $self->{_name} = shift;
462 } elsif (!$self->{_name}) {
463 my $genomic_align_group = $self->genomic_align_group;
464 if (defined($self->SUPER::name()) and $self->SUPER::name() ne "") {
465 ## Uses the name defined before blessing this object as a
466 ## Bio::EnsEMBL::Compara::GenomicAlignTree in the Ortheus pipeline
467 $self->{_name} = $self->SUPER::name();
468 } elsif ($self->is_leaf) {
469 my $gdb_name;
470 if($genomic_align_group->genome_db->name =~ /(.)[^ ]+_(.{3})/) {
471 $gdb_name = "${1}${2}";
472 }
473 else {
474 $gdb_name = $genomic_align_group->genome_db->name();
475 $gdb_name =~ tr/_//;
476 }
477 $gdb_name = ucfirst($gdb_name);
478 $self->{_name} = $gdb_name.'_'.$genomic_align_group->dnafrag->name."_".
479 $genomic_align_group->dnafrag_start."_".$genomic_align_group->dnafrag_end."[".
480 (($genomic_align_group->dnafrag_strand eq "-1")?"-":"+")."]";
481 } else {
482 $self->{_name} = join("-", map {
483 my $name = $_->genomic_align_group->genome_db->name;
484 if($name =~ /(.)[^ ]+_(.{3})/) {
485 $name = "$1$2";
486 }
487 else {
488 $name =~ tr/_//;
489 }
490 $name = ucfirst($name);
491 $name;
492 } @{$self->get_all_leaves})."[".scalar(@{$self->get_all_leaves})."]";
493 }
494 }
495
496 return $self->{_name};
497 }
498
499
500 =head2 get_all_sorted_genomic_align_nodes
501
502 Arg [1] : (optional) Bio::EnsEMBL::Compara::GenomicAlignTree $reference_genomic_align_node
503 Example : $object->get_all_sorted_genomic_align_nodes($ref_genomic_align_node);
504 Example : $nodes = $object->get_all_sorted_genomic_align_nodes();
505 Description : If ref_genomic_align_node is set, sorts the tree based on the
506 reference_genomic_align_node
507 If ref_genomic_align_node is not set, sorts the tree based on
508 the species name
509 Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree
510 Exceptions : none
511 Caller : general
512 Status : At risk
513
514 =cut
515
516 sub get_all_sorted_genomic_align_nodes {
517 my ($self, $reference_genomic_align_node) = @_;
518 my $sorted_genomic_align_nodes = [];
519
520 if (!$reference_genomic_align_node and $self->reference_genomic_align_node) {
521 $reference_genomic_align_node = $self->reference_genomic_align_node;
522 }
523
524 my $children = $self->children;
525 if (@$children >= 1) {
526 $children = [sort _sort_children @$children];
527 push(@$sorted_genomic_align_nodes, @{$children->[0]->get_all_sorted_genomic_align_nodes(
528 $reference_genomic_align_node)});
529 push(@$sorted_genomic_align_nodes, $self);
530 for (my $i = 1; $i < @$children; $i++) {
531 push(@$sorted_genomic_align_nodes, @{$children->[$i]->get_all_sorted_genomic_align_nodes(
532 $reference_genomic_align_node)});
533 }
534 } elsif (@$children == 0) {
535 push(@$sorted_genomic_align_nodes, $self);
536 }
537
538 return $sorted_genomic_align_nodes;
539 }
540
541
542 =head2 restrict_between_alignment_positions
543
544 Arg[1] : [optional] int $start, refers to the start of the alignment
545 Arg[2] : [optional] int $end, refers to the start of the alignment
546 Arg[3] : [optional] boolean $skip_empty_GenomicAligns
547 Example : none
548 Description: restrict this GenomicAlignBlock. It returns a new object unless no
549 restriction is needed. In that case, it returns the original unchanged
550 object.
551 This method uses coordinates relative to the alignment itself.
552 For instance if you have an alignment like:
553 1 1 2 2 3
554 1 5 0 5 0 5 0
555 AAC--CTTGTGGTA-CTACTT-----ACTTT
556 AACCCCTT-TGGTATCTACTTACCTAACTTT
557 and you restrict it between 5 and 25, you will get back a
558 object containing the following alignment:
559 1 1
560 1 5 0 5
561 CTTGTGGTA-CTACTT----
562 CTT-TGGTATCTACTTACCT
563
564 See restrict_between_reference_positions() elsewhere in this document
565 for an alternative method using absolute genomic coordinates.
566
567 NB: This method works only for GenomicAlignBlock which have been
568 fetched from the DB as it is adjusting the dnafrag coordinates
569 and the cigar_line only and not the actual sequences stored in the
570 object if any. If you want to restrict an object with no coordinates
571 a simple substr() will do!
572
573 Returntype : Bio::EnsEMBL::Compara::GenomicAlignBlock object
574 Exceptions : none
575 Caller : general
576 Status : At risk
577
578 =cut
579
580 sub restrict_between_alignment_positions {
581 my ($self, $start, $end, $skip_empty_GenomicAligns, $reference_genomic_align) = @_;
582 my $genomic_align_tree;
583 $genomic_align_tree = $self->copy();
584 $genomic_align_tree->adaptor($self->adaptor);
585 foreach my $this_node (@{$genomic_align_tree->get_all_nodes}) {
586 $this_node->adaptor($self->adaptor);
587 }
588 my $final_alignment_length = $end - $start + 1;
589
590 #Get all the nodes and restrict but only remove leaves if necessary. Call minimize_tree at the end to
591 #remove the internal nodes
592 foreach my $this_node (@{$genomic_align_tree->get_all_nodes}) {
593 my $genomic_align_group = $this_node->genomic_align_group;
594 next if (!$genomic_align_group);
595 my $new_genomic_aligns = [];
596 my $length = $this_node->length;
597 foreach my $this_genomic_align (@{$genomic_align_group->get_all_GenomicAligns}) {
598 my $restricted_genomic_align = $this_genomic_align->restrict($start, $end, $length);
599
600 if ($genomic_align_tree->reference_genomic_align eq $this_genomic_align) {
601 ## Update the reference_genomic_align
602
603 $genomic_align_tree->reference_genomic_align($restricted_genomic_align);
604 $genomic_align_tree->reference_genomic_align_node($this_node);
605 }
606 if (!$skip_empty_GenomicAligns or
607 $restricted_genomic_align->dnafrag_start <= $restricted_genomic_align->dnafrag_end
608 ) {
609 ## Always skip composite segments outside of the range of restriction
610 ## The cigar_line will contain only X's
611 next if ($restricted_genomic_align->cigar_line =~ /^\d*X$/);
612
613 #Set the genomic_align_block_id of the restricted genomic_align
614 $restricted_genomic_align->genomic_align_block_id($this_genomic_align->genomic_align_block_id);
615 #$restricted_genomic_align->genomic_align_block($genomic_align_tree);
616
617 push(@$new_genomic_aligns, $restricted_genomic_align);
618 }
619 }
620 if (@$new_genomic_aligns) {
621 $genomic_align_group->{genomic_align_array} = undef;
622 foreach my $this_genomic_align (@$new_genomic_aligns) {
623 $genomic_align_group->add_GenomicAlign($this_genomic_align);
624 }
625 } else {
626 #Only remove leaves. Use minimise_tree to tidy up the internal nodes
627 if ($this_node->is_leaf) {
628 $this_node->disavow_parent();
629 my $reference_genomic_align = $genomic_align_tree->reference_genomic_align;
630 if ($reference_genomic_align) {
631 my $reference_genomic_align_node = $genomic_align_tree->reference_genomic_align_node;
632 $genomic_align_tree = $genomic_align_tree->minimize_tree();
633 ## Make sure links are not broken after tree minimization
634 $genomic_align_tree->reference_genomic_align($reference_genomic_align);
635
636 #Set the genomic_align_block_id of the restricted genomic_align
637 $genomic_align_tree->reference_genomic_align->genomic_align_block_id($reference_genomic_align->genomic_align_block_id);
638
639 #$genomic_align_tree->reference_genomic_align->genomic_align_block($genomic_align_tree);
640 $genomic_align_tree->reference_genomic_align_node($reference_genomic_align_node);
641 }
642 }
643 }
644 }
645 $genomic_align_tree = $genomic_align_tree->minimize_tree();
646 $genomic_align_tree->length($final_alignment_length);
647
648 return $genomic_align_tree;
649 }
650
651
652 =head2 restrict_between_reference_positions
653
654 Arg[1] : [optional] int $start, refers to the reference_dnafrag
655 Arg[2] : [optional] int $end, refers to the reference_dnafrag
656 Arg[3] : [optional] Bio::EnsEMBL::Compara::GenomicAlign $reference_GenomicAlign
657 Arg[4] : [optional] boolean $skip_empty_GenomicAligns [ALWAYS FALSE]
658 Example : none
659 Description: restrict this GenomicAlignBlock. It returns a new object unless no
660 restriction is needed. In that case, it returns the original unchanged
661 object
662 It might be the case that the restricted region coincide with a gap
663 in one or several GenomicAligns. By default these GenomicAligns are
664 returned with a dnafrag_end equals to its dnafrag_start + 1. For instance,
665 a GenomicAlign with dnafrag_start = 12345 and dnafrag_end = 12344
666 correspond to a block which goes on this region from before 12345 to
667 after 12344, ie just between 12344 and 12345. You can choose to remove
668 these empty GenomicAligns by setting $skip_empty_GenomicAligns to any
669 true value.
670 Returntype : Bio::EnsEMBL::Compara::GenomicAlignBlock object in scalar context. In
671 list context, returns the previous object and the start and end
672 positions of the restriction in alignment coordinates (from 1 to
673 alignment_length)
674 Exceptions : return undef if reference positions lie outside of the alignment
675 Caller : general
676 Status : At risk
677
678 =cut
679
680 =comment
681
682 sub restrict_between_reference_positions {
683 my ($self, $start, $end, $reference_genomic_align, $skip_empty_GenomicAligns) = @_;
684 my $genomic_align_tree;
685
686 $reference_genomic_align ||= $self->reference_genomic_align;
687 throw("A reference Bio::EnsEMBL::Compara::GenomicAlignTree must be given")
688 if (!$reference_genomic_align);
689
690 my @restricted_genomic_align_tree_params = $self->SUPER::restrict_between_reference_positions($start, $end, $reference_genomic_align, $skip_empty_GenomicAligns);
691 my $restricted_genomic_align_tree = $restricted_genomic_align_tree_params[0];
692
693 #return $self if (!$restricted_genomic_align_tree or $restricted_genomic_align_tree eq $self);
694
695 return wantarray ? @restricted_genomic_align_tree_params : $restricted_genomic_align_tree;
696 }
697
698 =cut
699
700 =head2 copy
701
702 Arg : none
703 Example : my $new_tree = $this_tree->copy()
704 Description : Create a copy of this Bio::EnsEMBL::Compara::GenomicAlignTree
705 object
706 Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree
707 Exceptions : none
708 Caller : general
709 Status : At risk
710
711 =cut
712
713 sub copy {
714 my $self = shift(@_);
715 my $new_copy;
716
717 $new_copy = $self->SUPER::copy();
718
719 $new_copy->genomic_align_group($self->genomic_align_group->copy) if ($self->genomic_align_group);
720
721 if ($self->reference_genomic_align_node) {
722 my $ref_ga = $self->reference_genomic_align;
723 #Need to find the reference_genomic_align in the genomic_align_group
724 foreach my $leaf (@{$new_copy->get_all_leaves}) {
725 foreach my $gag ($leaf->genomic_align_group) {
726 foreach my $ga (@{$gag->genomic_align_array}) {
727 if ($ref_ga->dnafrag_id == $ga->dnafrag_id &&
728 $ref_ga->dnafrag_start == $ga->dnafrag_start &&
729 $ref_ga->dnafrag_end == $ga->dnafrag_end &&
730 $ref_ga->dnafrag_strand == $ga->dnafrag_strand) {
731 $new_copy->reference_genomic_align($ga);
732 last;
733 }
734 }
735 }
736 }
737 }
738
739
740 $new_copy->reference_genomic_align_node($self->reference_genomic_align_node->copy) if ($self->reference_genomic_align_node);
741
742 #These are not deep copies
743 #$new_copy->reference_genomic_align($self->reference_genomic_align) if ($self->reference_genomic_align);
744 #$new_copy->reference_genomic_align_node($self->reference_genomic_align_node) if ($self->reference_genomic_align_node);
745
746 #There are lots of bits missing from this copy
747 #Still to add?
748 #parent_link
749 #obj_id_to_link
750
751 $new_copy->{_original_strand} = $self->{_original_strand} if (defined $self->{_original_strand});
752 $new_copy->{_parent_id} = $self->{_parent_id} if (defined $self->{_parent_id});
753 $new_copy->{_root_id} = $self->{_root_id} if (defined $self->{_root_id});
754 $new_copy->{_left_node_id} = $self->{_left_node_id} if (defined $self->{_left_node_id});
755 $new_copy->{_right_node_id} = $self->{_right_node_id} if (defined $self->{_right_node_id});
756 $new_copy->{_node_id} = $self->{_node_id} if (defined $self->{_node_id});
757 $new_copy->{_reference_slice} = $self->{_reference_slice} if (defined $self->{_reference_slice});
758 $new_copy->{_reference_slice_start} = $self->{_reference_slice_start} if (defined $self->{_reference_slice_start});
759 $new_copy->{_reference_slice_end} = $self->{_reference_slice_end} if (defined $self->{_reference_slice_end});
760 $new_copy->{_reference_slice_strand} = $self->{_reference_slice_strand} if (defined $self->{_reference_slice_strand});
761
762 return $new_copy;
763 }
764
765 =head2 print
766
767 Arg : none
768 Example : print()
769 Description : Print the fields in a Bio::EnsEMBL::Compara::GenomicAlignTree
770 Returntype : none
771 Exceptions : none
772 Caller : general
773 Status : At risk
774
775 =cut
776
777 sub print {
778 my $self = shift(@_);
779 my $level = shift;
780 my $reference_genomic_align = shift;
781 if (!$level) {
782 print STDERR $self->newick_format(), "\n";
783 $reference_genomic_align = ($self->reference_genomic_align or "");
784 }
785 $level++;
786 my $mark = "- ";
787 if (grep {$_ eq $reference_genomic_align} @{$self->get_all_genomic_aligns_for_node}) {
788 $mark = "* ";
789 }
790 print STDERR " " x $level, $mark,
791 "[", $self->node_id, "/", ($self->get_original_strand?"+":"-"), "] ",
792 $self->genomic_align_group->genome_db->name,":",
793 $self->genomic_align_group->dnafrag->name,":",
794 $self->genomic_align_group->dnafrag_start,":",
795 $self->genomic_align_group->dnafrag_end,":",
796 $self->genomic_align_group->dnafrag_strand,":",
797 " (", ($self->left_node_id?$self->left_node->node_id."/".$self->left_node->root->node_id:"...."),
798 " - ", ($self->right_node_id?$self->right_node->node_id."/".$self->right_node->root->node_id:"...."),")\n";
799 foreach my $this_genomic_align (@{$self->get_all_genomic_aligns_for_node}) {
800 if ($this_genomic_align eq $reference_genomic_align) {
801 print " " x 8, "* ", $this_genomic_align->aligned_sequence("+FAKE_SEQ"), "\n";
802 } else {
803 print " " x 10, $this_genomic_align->aligned_sequence("+FAKE_SEQ"), "\n";
804 }
805 }
806 foreach my $node (sort _sort_children @{$self->children}) {
807 $node->print($level, $reference_genomic_align);
808 }
809 $level--;
810 }
811
812
813 =head2 get_all_nodes_from_leaves_to_this
814
815 Arg[1] : Bio::EnsEMBL::Compara::GenomicAlignTree $all_nodes
816 Example : my $all_nodes = get_all_nodes_from_leaves_to_this()
817 Description :
818 Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree object
819 Exceptions : none
820 Caller : general
821 Status : At risk
822
823 =cut
824
825 sub get_all_nodes_from_leaves_to_this {
826 my $self = shift(@_);
827 my $all_nodes = (shift or []);
828 foreach my $node (sort _sort_children @{$self->children}) {
829 $all_nodes = $node->get_all_nodes_from_leaves_to_this($all_nodes);
830 }
831 push(@$all_nodes, $self);
832 return $all_nodes;
833 }
834
835
836 =head2 get_all_leaves
837
838 Title : get_all_leaves
839 Usage : my @leaves = @{$tree->get_all_leaves};
840 Function: searching from the given starting node, searches and creates list
841 of all leaves in this subtree and returns by reference.
842 This method overwrites the parent method because it sorts
843 the leaves according to their node_id. Here, we use this method
844 to get all leaves in another sorting function. Not only it doesn't
845 make much sense to sort something that will be sorted again, but
846 it can also produce some Perl errors as sort methods uses $a and
847 $b which are package global variables.
848 Example :
849 Returns : reference to list of NestedSet objects (all leaves)
850 Args : none
851 Status : At risk
852
853 =cut
854
855 sub get_all_leaves {
856 my $self = shift;
857
858 my $leaves = {};
859 $self->_recursive_get_all_leaves($leaves);
860 my @leaf_list = values(%{$leaves});
861
862 return \@leaf_list;
863 }
864
865
866 =head2 _sort_children
867
868 Arg : none
869 Example : sort _sort_children @$children
870 Description : sort function for sorting the nodes of a Bio::EnsEMBL::Compara::GenomicAlignTree object
871 Returntype : int (-1,0,1)
872 Exceptions : none
873 Caller : general
874 Status : At risk
875
876 =cut
877
878 sub _sort_children {
879 my $reference_genomic_align_node;
880
881 if (defined ($a->root) && defined($b->root) && $a->root eq $b->root and $a->root->reference_genomic_align_node) {
882 $reference_genomic_align_node = $a->root->reference_genomic_align_node;
883 }
884
885 ## Reference GenomicAlign based sorting
886 if ($reference_genomic_align_node) {
887 if (grep {$_ eq $reference_genomic_align_node} @{$a->get_all_leaves}) {
888 return -1;
889 } elsif (grep {$_ eq $reference_genomic_align_node} @{$b->get_all_leaves}) {
890 return 1;
891 }
892 }
893
894 ## Species name based sorting
895 my $species_a = $a->_name_for_sorting;
896 my $species_b = $b->_name_for_sorting;
897
898 return $species_a cmp $species_b;
899 }
900
901 =head2 _name_for_sorting
902
903 Arg : none
904 Example : my $species_a = $a->_name_for_sorting;
905 Description : if the node is a leaf, create a name based on the species
906 name, dnafrag name, group_id and the start position. If the
907 node is an internal node, create a name based on the species
908 name, dnafrag name and the start position
909 Returntype : string
910 Exceptions : none
911 Caller : _sort_children
912 Status : At risk
913
914 =cut
915
916 sub _name_for_sorting {
917 my ($self) = @_;
918 my $name;
919
920 if ($self->is_leaf) {
921 $name = sprintf("%s.%s.%s.%020d",
922 $self->genomic_align_group->genome_db->name,
923 $self->genomic_align_group->dnafrag->name,
924 ($self->genomic_align_group->dbID or
925 $self->genomic_align_group->{original_dbID} or 0),
926 $self->genomic_align_group->dnafrag_start);
927 } else {
928 $name = join(" - ", sort map {sprintf("%s.%s.%020d",
929 $_->genomic_align_group->genome_db->name,
930 $_->genomic_align_group->dnafrag->name,
931 $_->genomic_align_group->dnafrag_start)} @{$self->get_all_leaves});
932 }
933
934 return $name;
935 }
936
937 =head2 reverse_complement
938
939 Args : none
940 Example : none
941 Description: reverse complement the tree,
942 modifying dnafrag_strand and cigar_line of each GenomicAlign in consequence
943 Returntype : none
944 Exceptions : none
945 Caller : general
946 Status : At risk
947
948 =cut
949
950 sub reverse_complement {
951 my ($self) = @_;
952
953 if (defined($self->{_original_strand})) {
954 $self->{_original_strand} = 1 - $self->{_original_strand};
955 } else {
956 $self->{_original_strand} = 0;
957 }
958
959 foreach my $this_node (@{$self->get_all_nodes}) {
960 my $genomic_align_group = $this_node->genomic_align_group;
961 next if (!$genomic_align_group);
962 my $gas = $genomic_align_group->get_all_GenomicAligns;
963 foreach my $ga (@{$gas}) {
964 $ga->reverse_complement;
965 }
966 }
967 }
968
969 sub length {
970 my ($self, $length) = @_;
971
972 if (defined($length)) {
973 $self->{'length'} = $length;
974 } elsif (!defined($self->{'length'})) {
975 # Try to get the ID from other sources...
976 if (defined($self->{'adaptor'}) and defined($self->dbID)) {
977 # ...from the database, using the dbID of the Bio::Ensembl::Compara::GenomicAlignBlock object
978 $self->adaptor->retrieve_all_direct_attributes($self);
979 } elsif (@{$self->get_all_genomic_aligns_for_node} and $self->get_all_genomic_aligns_for_node->[0]->aligned_sequence("+FAKE_SEQ")) {
980 $self->{'length'} = CORE::length($self->get_all_genomic_aligns_for_node->[0]->aligned_sequence("+FAKE_SEQ"));
981 } else {
982 foreach my $this_node (@{$self->get_all_nodes}) {
983 my $genomic_align_group = $this_node->genomic_align_group;
984 next if (!$genomic_align_group);
985 $self->{'length'} = CORE::length($genomic_align_group->get_all_GenomicAligns->[0]->aligned_sequence("+FAKE_SEQ"));
986 last;
987 }
988 }
989 }
990 return $self->{'length'};
991 }
992
993 =head2 alignment_strings
994
995 Arg [1] : none
996 Example : $genomic_align_tree->alignment_strings
997 Description: Returns the alignment string of all the sequences in the
998 alignment
999 Returntype : array reference containing several strings
1000 Exceptions : none
1001 Caller : general
1002 Status : Stable
1003
1004 =cut
1005
1006 sub alignment_strings {
1007 my ($self) = @_;
1008 my $alignment_strings = [];
1009
1010 foreach my $this_node (@{$self->get_all_nodes}) {
1011 my $genomic_align_group = $this_node->genomic_align_group;
1012 next if (!$genomic_align_group);
1013 foreach my $genomic_align (@{$genomic_align_group->get_all_GenomicAligns}) {
1014 push(@$alignment_strings, $genomic_align->aligned_sequence);
1015 }
1016 }
1017
1018 return $alignment_strings;
1019 }
1020
1021 =head2 method_link_species_set
1022
1023 Arg [1] : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $method_link_species_set
1024 Example : $method_link_species_set = $genomic_align_tree->method_link_species_set;
1025 Description: Getter for attribute method_link_species_set. Takes this from the first Bio::EnsEMBL::Compara::GenomicAlign
1026 object
1027 Returntype : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object
1028 Exceptions : thrown if $method_link_species_set is not a
1029 Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object
1030 Caller : general
1031 Status : Stable
1032
1033 =cut
1034
1035 sub method_link_species_set {
1036 my ($self) = @_;
1037
1038 my $method_link_species_set = $self->get_all_leaves->[0]->genomic_align_group->genomic_align_array->[0]->method_link_species_set;
1039
1040 throw("$method_link_species_set is not a Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object")
1041 unless ($method_link_species_set->isa("Bio::EnsEMBL::Compara::MethodLinkSpeciesSet"));
1042
1043 return $method_link_species_set;
1044 }
1045
1046 =head2 method_link_species_set_id
1047
1048 Arg [1] : integer $method_link_species_set_id
1049 Example : $method_link_species_set_id = $genomic_align_tree->method_link_species_set_id;
1050 Description: Getter for the attribute method_link_species_set_id. Takes this from the first
1051 Bio::EnsEMBL::Compara::GenomicAlign object
1052 Returntype : integer
1053 Caller : object::methodname
1054 Status : Stable
1055
1056 =cut
1057
1058 sub method_link_species_set_id {
1059 my ($self) = @_;
1060
1061 my $method_link_species_set_id = $self->get_all_leaves->[0]->genomic_align_group->genomic_align_array->[0]->method_link_species_set->dbID;
1062
1063 return $method_link_species_set_id;
1064 }
1065
1066 #sub DESTROY {
1067 # my ($self) = @_;
1068 # $self->release_tree unless ($self->{_parent_link});
1069 # }
1070
1071 1;