Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/EnsEMBL/Compara/GenomicAlignTree.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =head1 NAME | |
20 | |
21 Bio::EnsEMBL::Compara::GenomicAlignTree | |
22 | |
23 =head1 SYNOPSIS | |
24 | |
25 =head1 DESCRIPTION | |
26 | |
27 Specific subclass of NestedSet to add functionality when the nodes of this tree | |
28 are GenomicAlign objects and the tree is a representation of a Protein derived | |
29 Phylogenetic tree | |
30 | |
31 =head1 APPENDIX | |
32 | |
33 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
34 | |
35 =cut | |
36 | |
37 | |
38 package Bio::EnsEMBL::Compara::GenomicAlignTree; | |
39 | |
40 use strict; | |
41 use Bio::EnsEMBL::Utils::Argument; | |
42 use Bio::EnsEMBL::Utils::Exception; | |
43 use Bio::SimpleAlign; | |
44 use IO::File; | |
45 | |
46 use Bio::EnsEMBL::Compara::NestedSet; | |
47 use Bio::EnsEMBL::Compara::BaseGenomicAlignSet; | |
48 | |
49 our @ISA = qw(Bio::EnsEMBL::Compara::NestedSet Bio::EnsEMBL::Compara::BaseGenomicAlignSet); | |
50 | |
51 | |
52 =head2 left_node_id | |
53 | |
54 Arg [1] : (optional) $left_node_id | |
55 Example : $object->left_node_id($left_node_id); | |
56 Example : $left_node_id = $object->left_node_id(); | |
57 Description : Getter/setter for the left_node_id attribute | |
58 Returntype : integer | |
59 Exceptions : none | |
60 Caller : general | |
61 Status : Stable | |
62 | |
63 =cut | |
64 | |
65 sub left_node_id { | |
66 my $self = shift; | |
67 if (@_) { | |
68 $self->{_left_node_id} = shift; | |
69 } | |
70 return $self->{_left_node_id}; | |
71 } | |
72 | |
73 | |
74 =head2 right_node_id | |
75 | |
76 Arg [1] : (optional) $right_node_id | |
77 Example : $object->right_node_id($right_node_id); | |
78 Example : $right_node_id = $object->right_node_id(); | |
79 Description : Getter/setter for the right_node_id attribute | |
80 Returntype : integer | |
81 Exceptions : none | |
82 Caller : general | |
83 Status : Stable | |
84 | |
85 =cut | |
86 | |
87 sub right_node_id { | |
88 my $self = shift; | |
89 if (@_) { | |
90 $self->{_right_node_id} = shift; | |
91 } | |
92 return $self->{_right_node_id}; | |
93 } | |
94 | |
95 | |
96 =head2 left_node | |
97 | |
98 Arg [1] : -none- | |
99 Example : $left_node = $object->left_node(); | |
100 Description : Get the left_node object from the database. | |
101 Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree object | |
102 Exceptions : none | |
103 Caller : general | |
104 Status : Stable | |
105 | |
106 =cut | |
107 | |
108 sub left_node { | |
109 my $self = shift; | |
110 if ($self->{_left_node_id} and $self->adaptor) { | |
111 return $self->adaptor->fetch_node_by_node_id($self->{_left_node_id}); | |
112 } | |
113 return undef; | |
114 } | |
115 | |
116 | |
117 =head2 right_node | |
118 | |
119 Arg [1] : -none- | |
120 Example : $left_node = $object->right_node(); | |
121 Description : Get the right_node object from the database. | |
122 Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree object | |
123 Exceptions : none | |
124 Caller : general | |
125 Status : Stable | |
126 | |
127 =cut | |
128 | |
129 sub right_node { | |
130 my $self = shift; | |
131 if ($self->{_right_node_id} and $self->adaptor) { | |
132 return $self->adaptor->fetch_node_by_node_id($self->{_right_node_id}); | |
133 } | |
134 return undef; | |
135 } | |
136 | |
137 =head2 ancestral_genomic_align_block_id | |
138 | |
139 Arg [1] : (optional) $ancestral_genomic_align_block_id | |
140 Example : $object->ancestral_genomic_align_block_id($ancestral_genomic_align_block_id); | |
141 Example : $ancestral_genomic_align_block_id = $object->ancestral_genomic_align_block_id(); | |
142 Description : Getter/setter for the ancestral_genomic_align_block_id attribute | |
143 This attribute is intended for the root of the tree only! | |
144 Returntype : int | |
145 Exceptions : none | |
146 Caller : general | |
147 Status : Stable | |
148 | |
149 =cut | |
150 | |
151 sub ancestral_genomic_align_block_id { | |
152 my $self = shift; | |
153 if (@_) { | |
154 $self->{_ancestral_genomic_align_block_id} = shift; | |
155 } | |
156 return $self->{_ancestral_genomic_align_block_id}; | |
157 } | |
158 | |
159 | |
160 =head2 modern_genomic_align_block_id | |
161 | |
162 Arg [1] : (optional) $modern_genomic_align_block_id | |
163 Example : $object->modern_genomic_align_block_id($modern_genomic_align_block_id); | |
164 Example : $modern_genomic_align_block_id = $object->modern_genomic_align_block_id(); | |
165 Description : Getter/setter for the modern_genomic_align_block_id attribute | |
166 Returntype : | |
167 Exceptions : none | |
168 Caller : general | |
169 Status : Stable | |
170 | |
171 =cut | |
172 | |
173 sub modern_genomic_align_block_id { | |
174 my $self = shift; | |
175 if (@_) { | |
176 $self->{_modern_genomic_align_block_id} = shift; | |
177 } | |
178 return $self->{_modern_genomic_align_block_id}; | |
179 } | |
180 | |
181 | |
182 =head2 genomic_align_group | |
183 | |
184 Arg [1] : (optional) $genomic_align_group | |
185 Example : $object->genomic_align_group($genomic_align_group); | |
186 Example : $genomic_align_group = $object->genomic_align_group(); | |
187 Description : Getter/setter for the genomic_align_group attribute | |
188 Returntype : Bio::EnsEMBL::Compara::GenomicAlignGroup object | |
189 Exceptions : none | |
190 Caller : general | |
191 Status : Stable | |
192 | |
193 =cut | |
194 | |
195 sub genomic_align_group { | |
196 my $self = shift; | |
197 if (@_) { | |
198 $self->{_genomic_align_group} = shift; | |
199 } | |
200 return $self->{_genomic_align_group}; | |
201 } | |
202 | |
203 | |
204 =head2 get_all_genomic_aligns_for_node | |
205 | |
206 Arg [1] : -none- | |
207 Example : $genomic_aligns = $object->get_all_genomic_aligns_for_node | |
208 Description : Getter for all the GenomicAligns contained in the | |
209 genomic_align_group object on a node. This method is a short | |
210 cut for $object->genomic_align_group->get_all_GenomicAligns() | |
211 Returntype : listref of Bio::EnsEMBL::Compara::GenomicAlign objects | |
212 Exceptions : none | |
213 Caller : general | |
214 Status : Stable | |
215 | |
216 =cut | |
217 | |
218 sub get_all_genomic_aligns_for_node { | |
219 my $self = shift(@_); | |
220 return [] if (!$self->genomic_align_group); | |
221 return $self->genomic_align_group->get_all_GenomicAligns; | |
222 } | |
223 | |
224 =head2 genomic_align_array (DEPRECATED) | |
225 | |
226 Arg [1] : -none- | |
227 Example : $genomic_aligns = $object->genomic_align_array | |
228 Description : Alias for get_all_genomic_aligns_for_node. TO BE DEPRECATED | |
229 Returntype : listref of Bio::EnsEMBL::Compara::GenomicAlign objects | |
230 Exceptions : none | |
231 Caller : general | |
232 Status : Stable | |
233 | |
234 =cut | |
235 | |
236 sub genomic_align_array { | |
237 my $self = shift(@_); | |
238 | |
239 deprecate("Use Bio::EnsEMBL::Compara::GenomicAlignTree->get_all_genomic_aligns_for_node() method instead"); | |
240 return($self->get_all_genomic_aligns_for_node); | |
241 | |
242 } | |
243 | |
244 =head2 get_all_GenomicAligns (DEPRECATED) | |
245 | |
246 Arg [1] : -none- | |
247 Example : $genomic_aligns = $object->get_all_GenomicAligns | |
248 Description : Alias for get_all_genomic_aligns_for_node. TO BE DEPRECATED | |
249 Returntype : listref of Bio::EnsEMBL::Compara::GenomicAlign objects | |
250 Exceptions : none | |
251 Caller : general | |
252 Status : Stable | |
253 | |
254 =cut | |
255 | |
256 sub get_all_GenomicAligns { | |
257 my $self = shift(@_); | |
258 | |
259 deprecate("Use Bio::EnsEMBL::Compara::GenomicAlignTree->get_all_genomic_aligns_for_node() method instead"); | |
260 return($self->get_all_genomic_aligns_for_node); | |
261 | |
262 } | |
263 | |
264 =head2 reference_genomic_align | |
265 | |
266 Arg [1] : (optional) $reference_genomic_align | |
267 Example : $object->reference_genomic_align($reference_genomic_align); | |
268 Example : $reference_genomic_align = $object->reference_genomic_align(); | |
269 Description : Getter/setter for the reference_genomic_align attribute | |
270 Returntype : Bio::EnsEMBL::Compara::GenomicAlign object | |
271 Exceptions : none | |
272 Caller : general | |
273 Status : Stable | |
274 | |
275 =cut | |
276 | |
277 sub reference_genomic_align { | |
278 my $self = shift; | |
279 | |
280 if (@_) { | |
281 $self->{reference_genomic_align} = shift; | |
282 } | |
283 | |
284 return $self->{reference_genomic_align}; | |
285 } | |
286 | |
287 =head2 reference_genomic_align_id | |
288 | |
289 Arg [1] : integer $reference_genomic_align_id | |
290 Example : $genomic_align_block->reference_genomic_align_id(4321); | |
291 Description: get/set for attribute reference_genomic_align_id. A value of 0 will set the | |
292 reference_genomic_align_id attribute to undef. When looking for genomic | |
293 alignments in a given slice or dnafrag, the reference_genomic_align | |
294 corresponds to the Bio::EnsEMBL::Compara::GenomicAlign included in the | |
295 starting slice or dnafrag. The reference_genomic_align_id is the dbID | |
296 corresponding to the reference_genomic_align. All remaining | |
297 Bio::EnsEMBL::Compara::GenomicAlign objects included in the | |
298 Bio::EnsEMBL::Compara::GenomicAlignBlock are the | |
299 non_reference_genomic_aligns. | |
300 Synchronises reference_genomic_align and reference_genomic_align_id | |
301 attributes. | |
302 Returntype : integer | |
303 Exceptions : throw if $reference_genomic_align_id id not a postive number | |
304 Caller : $genomic_align_block->reference_genomic_align_id(int) | |
305 Status : Stable | |
306 | |
307 =cut | |
308 | |
309 sub reference_genomic_align_id { | |
310 my ($self, $reference_genomic_align_id) = @_; | |
311 | |
312 if (defined($reference_genomic_align_id)) { | |
313 if ($reference_genomic_align_id !~ /^\d+$/) { | |
314 throw "[$reference_genomic_align_id] should be a positive number."; | |
315 } | |
316 $self->{'reference_genomic_align_id'} = ($reference_genomic_align_id or undef); | |
317 | |
318 ## Synchronises reference_genomic_align and reference_genomic_align_id | |
319 if (defined($self->{'reference_genomic_align'}) and | |
320 defined($self->{'reference_genomic_align'}->dbID) and | |
321 ($self->{'reference_genomic_align'}->dbID != ($self->{'reference_genomic_align_id'} or 0))) { | |
322 $self->{'reference_genomic_align'} = undef; ## Attribute will be set on request | |
323 } | |
324 | |
325 ## Try to get data from other sources... | |
326 } elsif (!defined($self->{'reference_genomic_align_id'})) { | |
327 | |
328 ## ...from the reference_genomic_align attribute | |
329 if (defined($self->{'reference_genomic_align'}) and | |
330 defined($self->{'reference_genomic_align'}->dbID)) { | |
331 $self->{'reference_genomic_align_id'} = $self->{'reference_genomic_align'}->dbID; | |
332 } | |
333 } | |
334 | |
335 return $self->{'reference_genomic_align_id'}; | |
336 } | |
337 | |
338 | |
339 =head2 reference_genomic_align_node | |
340 | |
341 Arg [1] : (optional) $reference_genomic_align_node | |
342 Example : $object->reference_genomic_align_node($reference_genomic_align_node); | |
343 Example : $reference_genomic_align_node = $object->reference_genomic_align_node(); | |
344 Description : Getter/setter for the reference_genomic_align_node attribute | |
345 Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree object | |
346 Exceptions : none | |
347 Caller : general | |
348 Status : Stable | |
349 | |
350 =cut | |
351 | |
352 sub reference_genomic_align_node { | |
353 my $self = shift; | |
354 | |
355 if (@_) { | |
356 $self->{reference_genomic_align_node} = shift; | |
357 } | |
358 | |
359 return $self->{reference_genomic_align_node}; | |
360 } | |
361 | |
362 | |
363 =head2 aligned_sequence | |
364 | |
365 Arg [1] : -none- | |
366 Example : $aligned_sequence = $object->aligned_sequence(); | |
367 Description : Get the aligned sequence for this node. When the node | |
368 contains one single sequence, it returns its aligned sequence. | |
369 For composite segments, it returns the combined aligned seq. | |
370 Returntype : string | |
371 Exceptions : none | |
372 Caller : general | |
373 Status : Stable | |
374 | |
375 =cut | |
376 | |
377 sub aligned_sequence { | |
378 my $self = shift; | |
379 return $self->genomic_align_group->aligned_sequence(@_); | |
380 } | |
381 | |
382 | |
383 =head2 group_id | |
384 | |
385 Arg [1] : integer $group_id | |
386 Example : my $group_id = $genomic_align_tree->group_id; | |
387 Example : $genomic_align_tree->group_id(1234); | |
388 Description: get/set for attribute group_id of the underlying | |
389 GenomicAlignBlock objects | |
390 Returntype : integer | |
391 Exceptions : A GenomicAlignTree is made of two GenomicAlignBlock | |
392 object. The method fail when gettign the value if the | |
393 two group_ids do not match | |
394 Caller : general | |
395 | |
396 =cut | |
397 | |
398 sub group_id { | |
399 my ($self, $group_id) = @_; | |
400 | |
401 if (defined($group_id)) { | |
402 $self->{'group_id'} = $group_id; | |
403 # Set the group_id on the genomic_align_blocks... | |
404 my %genomic_align_blocks; | |
405 #foreach my $this_genomic_align_node (@{$self->get_all_sorted_genomic_align_nodes()}) { | |
406 foreach my $this_genomic_align_node (@{$self->get_all_nodes()}) { | |
407 next if (!defined $this_genomic_align_node->genomic_align_group); | |
408 foreach my $genomic_align (@{$this_genomic_align_node->genomic_align_group->get_all_GenomicAligns}) { | |
409 my $this_genomic_align_block = $genomic_align->genomic_align_block; | |
410 if ($this_genomic_align_block and !defined($genomic_align_blocks{$this_genomic_align_block})) { | |
411 $this_genomic_align_block->group_id($group_id); | |
412 $genomic_align_blocks{$this_genomic_align_block} = 1; | |
413 } | |
414 } | |
415 } | |
416 } elsif (!defined($self->{'group_id'}) and defined($self->{adaptor})) { | |
417 # Try to get the ID from other sources... | |
418 my %group_ids; | |
419 my $genomic_align_block_adaptor = $self->adaptor->dba->get_GenomicAlignBlockAdaptor; | |
420 foreach my $this_genomic_align_node (@{$self->get_all_nodes()}) { | |
421 next if (!defined $this_genomic_align_node->genomic_align_group); | |
422 foreach my $genomic_align (@{$this_genomic_align_node->genomic_align_group->get_all_GenomicAligns}) { | |
423 my $this_genomic_align_block_id = $genomic_align->genomic_align_block_id; | |
424 my $this_genomic_align_block = $genomic_align_block_adaptor->fetch_by_dbID($this_genomic_align_block_id); | |
425 if ($this_genomic_align_block->group_id) { | |
426 $group_ids{$this_genomic_align_block->group_id} = 1; | |
427 } else { | |
428 $group_ids{"undef"} = 1; | |
429 } | |
430 } | |
431 } | |
432 if (keys %group_ids == 1) { | |
433 if (!defined($group_ids{"undef"})) { | |
434 $self->{'group_id'} = (keys %group_ids)[0]; | |
435 } | |
436 } else { | |
437 warning("Different group_ids found for this GenomicAlignTree\n"); | |
438 } | |
439 } | |
440 return $self->{'group_id'}; | |
441 } | |
442 | |
443 | |
444 =head2 name | |
445 | |
446 Arg [1] : (optional) string $name | |
447 Example : $object->name($name); | |
448 Example : $name = $object->name(); | |
449 Description : Getter/setter for the name attribute. | |
450 Returntype : string | |
451 Exceptions : none | |
452 Caller : general | |
453 Status : At risk | |
454 | |
455 =cut | |
456 | |
457 sub name { | |
458 my $self = shift; | |
459 | |
460 if (@_) { | |
461 $self->{_name} = shift; | |
462 } elsif (!$self->{_name}) { | |
463 my $genomic_align_group = $self->genomic_align_group; | |
464 if (defined($self->SUPER::name()) and $self->SUPER::name() ne "") { | |
465 ## Uses the name defined before blessing this object as a | |
466 ## Bio::EnsEMBL::Compara::GenomicAlignTree in the Ortheus pipeline | |
467 $self->{_name} = $self->SUPER::name(); | |
468 } elsif ($self->is_leaf) { | |
469 my $gdb_name; | |
470 if($genomic_align_group->genome_db->name =~ /(.)[^ ]+_(.{3})/) { | |
471 $gdb_name = "${1}${2}"; | |
472 } | |
473 else { | |
474 $gdb_name = $genomic_align_group->genome_db->name(); | |
475 $gdb_name =~ tr/_//; | |
476 } | |
477 $gdb_name = ucfirst($gdb_name); | |
478 $self->{_name} = $gdb_name.'_'.$genomic_align_group->dnafrag->name."_". | |
479 $genomic_align_group->dnafrag_start."_".$genomic_align_group->dnafrag_end."[". | |
480 (($genomic_align_group->dnafrag_strand eq "-1")?"-":"+")."]"; | |
481 } else { | |
482 $self->{_name} = join("-", map { | |
483 my $name = $_->genomic_align_group->genome_db->name; | |
484 if($name =~ /(.)[^ ]+_(.{3})/) { | |
485 $name = "$1$2"; | |
486 } | |
487 else { | |
488 $name =~ tr/_//; | |
489 } | |
490 $name = ucfirst($name); | |
491 $name; | |
492 } @{$self->get_all_leaves})."[".scalar(@{$self->get_all_leaves})."]"; | |
493 } | |
494 } | |
495 | |
496 return $self->{_name}; | |
497 } | |
498 | |
499 | |
500 =head2 get_all_sorted_genomic_align_nodes | |
501 | |
502 Arg [1] : (optional) Bio::EnsEMBL::Compara::GenomicAlignTree $reference_genomic_align_node | |
503 Example : $object->get_all_sorted_genomic_align_nodes($ref_genomic_align_node); | |
504 Example : $nodes = $object->get_all_sorted_genomic_align_nodes(); | |
505 Description : If ref_genomic_align_node is set, sorts the tree based on the | |
506 reference_genomic_align_node | |
507 If ref_genomic_align_node is not set, sorts the tree based on | |
508 the species name | |
509 Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree | |
510 Exceptions : none | |
511 Caller : general | |
512 Status : At risk | |
513 | |
514 =cut | |
515 | |
516 sub get_all_sorted_genomic_align_nodes { | |
517 my ($self, $reference_genomic_align_node) = @_; | |
518 my $sorted_genomic_align_nodes = []; | |
519 | |
520 if (!$reference_genomic_align_node and $self->reference_genomic_align_node) { | |
521 $reference_genomic_align_node = $self->reference_genomic_align_node; | |
522 } | |
523 | |
524 my $children = $self->children; | |
525 if (@$children >= 1) { | |
526 $children = [sort _sort_children @$children]; | |
527 push(@$sorted_genomic_align_nodes, @{$children->[0]->get_all_sorted_genomic_align_nodes( | |
528 $reference_genomic_align_node)}); | |
529 push(@$sorted_genomic_align_nodes, $self); | |
530 for (my $i = 1; $i < @$children; $i++) { | |
531 push(@$sorted_genomic_align_nodes, @{$children->[$i]->get_all_sorted_genomic_align_nodes( | |
532 $reference_genomic_align_node)}); | |
533 } | |
534 } elsif (@$children == 0) { | |
535 push(@$sorted_genomic_align_nodes, $self); | |
536 } | |
537 | |
538 return $sorted_genomic_align_nodes; | |
539 } | |
540 | |
541 | |
542 =head2 restrict_between_alignment_positions | |
543 | |
544 Arg[1] : [optional] int $start, refers to the start of the alignment | |
545 Arg[2] : [optional] int $end, refers to the start of the alignment | |
546 Arg[3] : [optional] boolean $skip_empty_GenomicAligns | |
547 Example : none | |
548 Description: restrict this GenomicAlignBlock. It returns a new object unless no | |
549 restriction is needed. In that case, it returns the original unchanged | |
550 object. | |
551 This method uses coordinates relative to the alignment itself. | |
552 For instance if you have an alignment like: | |
553 1 1 2 2 3 | |
554 1 5 0 5 0 5 0 | |
555 AAC--CTTGTGGTA-CTACTT-----ACTTT | |
556 AACCCCTT-TGGTATCTACTTACCTAACTTT | |
557 and you restrict it between 5 and 25, you will get back a | |
558 object containing the following alignment: | |
559 1 1 | |
560 1 5 0 5 | |
561 CTTGTGGTA-CTACTT---- | |
562 CTT-TGGTATCTACTTACCT | |
563 | |
564 See restrict_between_reference_positions() elsewhere in this document | |
565 for an alternative method using absolute genomic coordinates. | |
566 | |
567 NB: This method works only for GenomicAlignBlock which have been | |
568 fetched from the DB as it is adjusting the dnafrag coordinates | |
569 and the cigar_line only and not the actual sequences stored in the | |
570 object if any. If you want to restrict an object with no coordinates | |
571 a simple substr() will do! | |
572 | |
573 Returntype : Bio::EnsEMBL::Compara::GenomicAlignBlock object | |
574 Exceptions : none | |
575 Caller : general | |
576 Status : At risk | |
577 | |
578 =cut | |
579 | |
580 sub restrict_between_alignment_positions { | |
581 my ($self, $start, $end, $skip_empty_GenomicAligns, $reference_genomic_align) = @_; | |
582 my $genomic_align_tree; | |
583 $genomic_align_tree = $self->copy(); | |
584 $genomic_align_tree->adaptor($self->adaptor); | |
585 foreach my $this_node (@{$genomic_align_tree->get_all_nodes}) { | |
586 $this_node->adaptor($self->adaptor); | |
587 } | |
588 my $final_alignment_length = $end - $start + 1; | |
589 | |
590 #Get all the nodes and restrict but only remove leaves if necessary. Call minimize_tree at the end to | |
591 #remove the internal nodes | |
592 foreach my $this_node (@{$genomic_align_tree->get_all_nodes}) { | |
593 my $genomic_align_group = $this_node->genomic_align_group; | |
594 next if (!$genomic_align_group); | |
595 my $new_genomic_aligns = []; | |
596 my $length = $this_node->length; | |
597 foreach my $this_genomic_align (@{$genomic_align_group->get_all_GenomicAligns}) { | |
598 my $restricted_genomic_align = $this_genomic_align->restrict($start, $end, $length); | |
599 | |
600 if ($genomic_align_tree->reference_genomic_align eq $this_genomic_align) { | |
601 ## Update the reference_genomic_align | |
602 | |
603 $genomic_align_tree->reference_genomic_align($restricted_genomic_align); | |
604 $genomic_align_tree->reference_genomic_align_node($this_node); | |
605 } | |
606 if (!$skip_empty_GenomicAligns or | |
607 $restricted_genomic_align->dnafrag_start <= $restricted_genomic_align->dnafrag_end | |
608 ) { | |
609 ## Always skip composite segments outside of the range of restriction | |
610 ## The cigar_line will contain only X's | |
611 next if ($restricted_genomic_align->cigar_line =~ /^\d*X$/); | |
612 | |
613 #Set the genomic_align_block_id of the restricted genomic_align | |
614 $restricted_genomic_align->genomic_align_block_id($this_genomic_align->genomic_align_block_id); | |
615 #$restricted_genomic_align->genomic_align_block($genomic_align_tree); | |
616 | |
617 push(@$new_genomic_aligns, $restricted_genomic_align); | |
618 } | |
619 } | |
620 if (@$new_genomic_aligns) { | |
621 $genomic_align_group->{genomic_align_array} = undef; | |
622 foreach my $this_genomic_align (@$new_genomic_aligns) { | |
623 $genomic_align_group->add_GenomicAlign($this_genomic_align); | |
624 } | |
625 } else { | |
626 #Only remove leaves. Use minimise_tree to tidy up the internal nodes | |
627 if ($this_node->is_leaf) { | |
628 $this_node->disavow_parent(); | |
629 my $reference_genomic_align = $genomic_align_tree->reference_genomic_align; | |
630 if ($reference_genomic_align) { | |
631 my $reference_genomic_align_node = $genomic_align_tree->reference_genomic_align_node; | |
632 $genomic_align_tree = $genomic_align_tree->minimize_tree(); | |
633 ## Make sure links are not broken after tree minimization | |
634 $genomic_align_tree->reference_genomic_align($reference_genomic_align); | |
635 | |
636 #Set the genomic_align_block_id of the restricted genomic_align | |
637 $genomic_align_tree->reference_genomic_align->genomic_align_block_id($reference_genomic_align->genomic_align_block_id); | |
638 | |
639 #$genomic_align_tree->reference_genomic_align->genomic_align_block($genomic_align_tree); | |
640 $genomic_align_tree->reference_genomic_align_node($reference_genomic_align_node); | |
641 } | |
642 } | |
643 } | |
644 } | |
645 $genomic_align_tree = $genomic_align_tree->minimize_tree(); | |
646 $genomic_align_tree->length($final_alignment_length); | |
647 | |
648 return $genomic_align_tree; | |
649 } | |
650 | |
651 | |
652 =head2 restrict_between_reference_positions | |
653 | |
654 Arg[1] : [optional] int $start, refers to the reference_dnafrag | |
655 Arg[2] : [optional] int $end, refers to the reference_dnafrag | |
656 Arg[3] : [optional] Bio::EnsEMBL::Compara::GenomicAlign $reference_GenomicAlign | |
657 Arg[4] : [optional] boolean $skip_empty_GenomicAligns [ALWAYS FALSE] | |
658 Example : none | |
659 Description: restrict this GenomicAlignBlock. It returns a new object unless no | |
660 restriction is needed. In that case, it returns the original unchanged | |
661 object | |
662 It might be the case that the restricted region coincide with a gap | |
663 in one or several GenomicAligns. By default these GenomicAligns are | |
664 returned with a dnafrag_end equals to its dnafrag_start + 1. For instance, | |
665 a GenomicAlign with dnafrag_start = 12345 and dnafrag_end = 12344 | |
666 correspond to a block which goes on this region from before 12345 to | |
667 after 12344, ie just between 12344 and 12345. You can choose to remove | |
668 these empty GenomicAligns by setting $skip_empty_GenomicAligns to any | |
669 true value. | |
670 Returntype : Bio::EnsEMBL::Compara::GenomicAlignBlock object in scalar context. In | |
671 list context, returns the previous object and the start and end | |
672 positions of the restriction in alignment coordinates (from 1 to | |
673 alignment_length) | |
674 Exceptions : return undef if reference positions lie outside of the alignment | |
675 Caller : general | |
676 Status : At risk | |
677 | |
678 =cut | |
679 | |
680 =comment | |
681 | |
682 sub restrict_between_reference_positions { | |
683 my ($self, $start, $end, $reference_genomic_align, $skip_empty_GenomicAligns) = @_; | |
684 my $genomic_align_tree; | |
685 | |
686 $reference_genomic_align ||= $self->reference_genomic_align; | |
687 throw("A reference Bio::EnsEMBL::Compara::GenomicAlignTree must be given") | |
688 if (!$reference_genomic_align); | |
689 | |
690 my @restricted_genomic_align_tree_params = $self->SUPER::restrict_between_reference_positions($start, $end, $reference_genomic_align, $skip_empty_GenomicAligns); | |
691 my $restricted_genomic_align_tree = $restricted_genomic_align_tree_params[0]; | |
692 | |
693 #return $self if (!$restricted_genomic_align_tree or $restricted_genomic_align_tree eq $self); | |
694 | |
695 return wantarray ? @restricted_genomic_align_tree_params : $restricted_genomic_align_tree; | |
696 } | |
697 | |
698 =cut | |
699 | |
700 =head2 copy | |
701 | |
702 Arg : none | |
703 Example : my $new_tree = $this_tree->copy() | |
704 Description : Create a copy of this Bio::EnsEMBL::Compara::GenomicAlignTree | |
705 object | |
706 Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree | |
707 Exceptions : none | |
708 Caller : general | |
709 Status : At risk | |
710 | |
711 =cut | |
712 | |
713 sub copy { | |
714 my $self = shift(@_); | |
715 my $new_copy; | |
716 | |
717 $new_copy = $self->SUPER::copy(); | |
718 | |
719 $new_copy->genomic_align_group($self->genomic_align_group->copy) if ($self->genomic_align_group); | |
720 | |
721 if ($self->reference_genomic_align_node) { | |
722 my $ref_ga = $self->reference_genomic_align; | |
723 #Need to find the reference_genomic_align in the genomic_align_group | |
724 foreach my $leaf (@{$new_copy->get_all_leaves}) { | |
725 foreach my $gag ($leaf->genomic_align_group) { | |
726 foreach my $ga (@{$gag->genomic_align_array}) { | |
727 if ($ref_ga->dnafrag_id == $ga->dnafrag_id && | |
728 $ref_ga->dnafrag_start == $ga->dnafrag_start && | |
729 $ref_ga->dnafrag_end == $ga->dnafrag_end && | |
730 $ref_ga->dnafrag_strand == $ga->dnafrag_strand) { | |
731 $new_copy->reference_genomic_align($ga); | |
732 last; | |
733 } | |
734 } | |
735 } | |
736 } | |
737 } | |
738 | |
739 | |
740 $new_copy->reference_genomic_align_node($self->reference_genomic_align_node->copy) if ($self->reference_genomic_align_node); | |
741 | |
742 #These are not deep copies | |
743 #$new_copy->reference_genomic_align($self->reference_genomic_align) if ($self->reference_genomic_align); | |
744 #$new_copy->reference_genomic_align_node($self->reference_genomic_align_node) if ($self->reference_genomic_align_node); | |
745 | |
746 #There are lots of bits missing from this copy | |
747 #Still to add? | |
748 #parent_link | |
749 #obj_id_to_link | |
750 | |
751 $new_copy->{_original_strand} = $self->{_original_strand} if (defined $self->{_original_strand}); | |
752 $new_copy->{_parent_id} = $self->{_parent_id} if (defined $self->{_parent_id}); | |
753 $new_copy->{_root_id} = $self->{_root_id} if (defined $self->{_root_id}); | |
754 $new_copy->{_left_node_id} = $self->{_left_node_id} if (defined $self->{_left_node_id}); | |
755 $new_copy->{_right_node_id} = $self->{_right_node_id} if (defined $self->{_right_node_id}); | |
756 $new_copy->{_node_id} = $self->{_node_id} if (defined $self->{_node_id}); | |
757 $new_copy->{_reference_slice} = $self->{_reference_slice} if (defined $self->{_reference_slice}); | |
758 $new_copy->{_reference_slice_start} = $self->{_reference_slice_start} if (defined $self->{_reference_slice_start}); | |
759 $new_copy->{_reference_slice_end} = $self->{_reference_slice_end} if (defined $self->{_reference_slice_end}); | |
760 $new_copy->{_reference_slice_strand} = $self->{_reference_slice_strand} if (defined $self->{_reference_slice_strand}); | |
761 | |
762 return $new_copy; | |
763 } | |
764 | |
765 =head2 print | |
766 | |
767 Arg : none | |
768 Example : print() | |
769 Description : Print the fields in a Bio::EnsEMBL::Compara::GenomicAlignTree | |
770 Returntype : none | |
771 Exceptions : none | |
772 Caller : general | |
773 Status : At risk | |
774 | |
775 =cut | |
776 | |
777 sub print { | |
778 my $self = shift(@_); | |
779 my $level = shift; | |
780 my $reference_genomic_align = shift; | |
781 if (!$level) { | |
782 print STDERR $self->newick_format(), "\n"; | |
783 $reference_genomic_align = ($self->reference_genomic_align or ""); | |
784 } | |
785 $level++; | |
786 my $mark = "- "; | |
787 if (grep {$_ eq $reference_genomic_align} @{$self->get_all_genomic_aligns_for_node}) { | |
788 $mark = "* "; | |
789 } | |
790 print STDERR " " x $level, $mark, | |
791 "[", $self->node_id, "/", ($self->get_original_strand?"+":"-"), "] ", | |
792 $self->genomic_align_group->genome_db->name,":", | |
793 $self->genomic_align_group->dnafrag->name,":", | |
794 $self->genomic_align_group->dnafrag_start,":", | |
795 $self->genomic_align_group->dnafrag_end,":", | |
796 $self->genomic_align_group->dnafrag_strand,":", | |
797 " (", ($self->left_node_id?$self->left_node->node_id."/".$self->left_node->root->node_id:"...."), | |
798 " - ", ($self->right_node_id?$self->right_node->node_id."/".$self->right_node->root->node_id:"...."),")\n"; | |
799 foreach my $this_genomic_align (@{$self->get_all_genomic_aligns_for_node}) { | |
800 if ($this_genomic_align eq $reference_genomic_align) { | |
801 print " " x 8, "* ", $this_genomic_align->aligned_sequence("+FAKE_SEQ"), "\n"; | |
802 } else { | |
803 print " " x 10, $this_genomic_align->aligned_sequence("+FAKE_SEQ"), "\n"; | |
804 } | |
805 } | |
806 foreach my $node (sort _sort_children @{$self->children}) { | |
807 $node->print($level, $reference_genomic_align); | |
808 } | |
809 $level--; | |
810 } | |
811 | |
812 | |
813 =head2 get_all_nodes_from_leaves_to_this | |
814 | |
815 Arg[1] : Bio::EnsEMBL::Compara::GenomicAlignTree $all_nodes | |
816 Example : my $all_nodes = get_all_nodes_from_leaves_to_this() | |
817 Description : | |
818 Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree object | |
819 Exceptions : none | |
820 Caller : general | |
821 Status : At risk | |
822 | |
823 =cut | |
824 | |
825 sub get_all_nodes_from_leaves_to_this { | |
826 my $self = shift(@_); | |
827 my $all_nodes = (shift or []); | |
828 foreach my $node (sort _sort_children @{$self->children}) { | |
829 $all_nodes = $node->get_all_nodes_from_leaves_to_this($all_nodes); | |
830 } | |
831 push(@$all_nodes, $self); | |
832 return $all_nodes; | |
833 } | |
834 | |
835 | |
836 =head2 get_all_leaves | |
837 | |
838 Title : get_all_leaves | |
839 Usage : my @leaves = @{$tree->get_all_leaves}; | |
840 Function: searching from the given starting node, searches and creates list | |
841 of all leaves in this subtree and returns by reference. | |
842 This method overwrites the parent method because it sorts | |
843 the leaves according to their node_id. Here, we use this method | |
844 to get all leaves in another sorting function. Not only it doesn't | |
845 make much sense to sort something that will be sorted again, but | |
846 it can also produce some Perl errors as sort methods uses $a and | |
847 $b which are package global variables. | |
848 Example : | |
849 Returns : reference to list of NestedSet objects (all leaves) | |
850 Args : none | |
851 Status : At risk | |
852 | |
853 =cut | |
854 | |
855 sub get_all_leaves { | |
856 my $self = shift; | |
857 | |
858 my $leaves = {}; | |
859 $self->_recursive_get_all_leaves($leaves); | |
860 my @leaf_list = values(%{$leaves}); | |
861 | |
862 return \@leaf_list; | |
863 } | |
864 | |
865 | |
866 =head2 _sort_children | |
867 | |
868 Arg : none | |
869 Example : sort _sort_children @$children | |
870 Description : sort function for sorting the nodes of a Bio::EnsEMBL::Compara::GenomicAlignTree object | |
871 Returntype : int (-1,0,1) | |
872 Exceptions : none | |
873 Caller : general | |
874 Status : At risk | |
875 | |
876 =cut | |
877 | |
878 sub _sort_children { | |
879 my $reference_genomic_align_node; | |
880 | |
881 if (defined ($a->root) && defined($b->root) && $a->root eq $b->root and $a->root->reference_genomic_align_node) { | |
882 $reference_genomic_align_node = $a->root->reference_genomic_align_node; | |
883 } | |
884 | |
885 ## Reference GenomicAlign based sorting | |
886 if ($reference_genomic_align_node) { | |
887 if (grep {$_ eq $reference_genomic_align_node} @{$a->get_all_leaves}) { | |
888 return -1; | |
889 } elsif (grep {$_ eq $reference_genomic_align_node} @{$b->get_all_leaves}) { | |
890 return 1; | |
891 } | |
892 } | |
893 | |
894 ## Species name based sorting | |
895 my $species_a = $a->_name_for_sorting; | |
896 my $species_b = $b->_name_for_sorting; | |
897 | |
898 return $species_a cmp $species_b; | |
899 } | |
900 | |
901 =head2 _name_for_sorting | |
902 | |
903 Arg : none | |
904 Example : my $species_a = $a->_name_for_sorting; | |
905 Description : if the node is a leaf, create a name based on the species | |
906 name, dnafrag name, group_id and the start position. If the | |
907 node is an internal node, create a name based on the species | |
908 name, dnafrag name and the start position | |
909 Returntype : string | |
910 Exceptions : none | |
911 Caller : _sort_children | |
912 Status : At risk | |
913 | |
914 =cut | |
915 | |
916 sub _name_for_sorting { | |
917 my ($self) = @_; | |
918 my $name; | |
919 | |
920 if ($self->is_leaf) { | |
921 $name = sprintf("%s.%s.%s.%020d", | |
922 $self->genomic_align_group->genome_db->name, | |
923 $self->genomic_align_group->dnafrag->name, | |
924 ($self->genomic_align_group->dbID or | |
925 $self->genomic_align_group->{original_dbID} or 0), | |
926 $self->genomic_align_group->dnafrag_start); | |
927 } else { | |
928 $name = join(" - ", sort map {sprintf("%s.%s.%020d", | |
929 $_->genomic_align_group->genome_db->name, | |
930 $_->genomic_align_group->dnafrag->name, | |
931 $_->genomic_align_group->dnafrag_start)} @{$self->get_all_leaves}); | |
932 } | |
933 | |
934 return $name; | |
935 } | |
936 | |
937 =head2 reverse_complement | |
938 | |
939 Args : none | |
940 Example : none | |
941 Description: reverse complement the tree, | |
942 modifying dnafrag_strand and cigar_line of each GenomicAlign in consequence | |
943 Returntype : none | |
944 Exceptions : none | |
945 Caller : general | |
946 Status : At risk | |
947 | |
948 =cut | |
949 | |
950 sub reverse_complement { | |
951 my ($self) = @_; | |
952 | |
953 if (defined($self->{_original_strand})) { | |
954 $self->{_original_strand} = 1 - $self->{_original_strand}; | |
955 } else { | |
956 $self->{_original_strand} = 0; | |
957 } | |
958 | |
959 foreach my $this_node (@{$self->get_all_nodes}) { | |
960 my $genomic_align_group = $this_node->genomic_align_group; | |
961 next if (!$genomic_align_group); | |
962 my $gas = $genomic_align_group->get_all_GenomicAligns; | |
963 foreach my $ga (@{$gas}) { | |
964 $ga->reverse_complement; | |
965 } | |
966 } | |
967 } | |
968 | |
969 sub length { | |
970 my ($self, $length) = @_; | |
971 | |
972 if (defined($length)) { | |
973 $self->{'length'} = $length; | |
974 } elsif (!defined($self->{'length'})) { | |
975 # Try to get the ID from other sources... | |
976 if (defined($self->{'adaptor'}) and defined($self->dbID)) { | |
977 # ...from the database, using the dbID of the Bio::Ensembl::Compara::GenomicAlignBlock object | |
978 $self->adaptor->retrieve_all_direct_attributes($self); | |
979 } elsif (@{$self->get_all_genomic_aligns_for_node} and $self->get_all_genomic_aligns_for_node->[0]->aligned_sequence("+FAKE_SEQ")) { | |
980 $self->{'length'} = CORE::length($self->get_all_genomic_aligns_for_node->[0]->aligned_sequence("+FAKE_SEQ")); | |
981 } else { | |
982 foreach my $this_node (@{$self->get_all_nodes}) { | |
983 my $genomic_align_group = $this_node->genomic_align_group; | |
984 next if (!$genomic_align_group); | |
985 $self->{'length'} = CORE::length($genomic_align_group->get_all_GenomicAligns->[0]->aligned_sequence("+FAKE_SEQ")); | |
986 last; | |
987 } | |
988 } | |
989 } | |
990 return $self->{'length'}; | |
991 } | |
992 | |
993 =head2 alignment_strings | |
994 | |
995 Arg [1] : none | |
996 Example : $genomic_align_tree->alignment_strings | |
997 Description: Returns the alignment string of all the sequences in the | |
998 alignment | |
999 Returntype : array reference containing several strings | |
1000 Exceptions : none | |
1001 Caller : general | |
1002 Status : Stable | |
1003 | |
1004 =cut | |
1005 | |
1006 sub alignment_strings { | |
1007 my ($self) = @_; | |
1008 my $alignment_strings = []; | |
1009 | |
1010 foreach my $this_node (@{$self->get_all_nodes}) { | |
1011 my $genomic_align_group = $this_node->genomic_align_group; | |
1012 next if (!$genomic_align_group); | |
1013 foreach my $genomic_align (@{$genomic_align_group->get_all_GenomicAligns}) { | |
1014 push(@$alignment_strings, $genomic_align->aligned_sequence); | |
1015 } | |
1016 } | |
1017 | |
1018 return $alignment_strings; | |
1019 } | |
1020 | |
1021 =head2 method_link_species_set | |
1022 | |
1023 Arg [1] : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $method_link_species_set | |
1024 Example : $method_link_species_set = $genomic_align_tree->method_link_species_set; | |
1025 Description: Getter for attribute method_link_species_set. Takes this from the first Bio::EnsEMBL::Compara::GenomicAlign | |
1026 object | |
1027 Returntype : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object | |
1028 Exceptions : thrown if $method_link_species_set is not a | |
1029 Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object | |
1030 Caller : general | |
1031 Status : Stable | |
1032 | |
1033 =cut | |
1034 | |
1035 sub method_link_species_set { | |
1036 my ($self) = @_; | |
1037 | |
1038 my $method_link_species_set = $self->get_all_leaves->[0]->genomic_align_group->genomic_align_array->[0]->method_link_species_set; | |
1039 | |
1040 throw("$method_link_species_set is not a Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object") | |
1041 unless ($method_link_species_set->isa("Bio::EnsEMBL::Compara::MethodLinkSpeciesSet")); | |
1042 | |
1043 return $method_link_species_set; | |
1044 } | |
1045 | |
1046 =head2 method_link_species_set_id | |
1047 | |
1048 Arg [1] : integer $method_link_species_set_id | |
1049 Example : $method_link_species_set_id = $genomic_align_tree->method_link_species_set_id; | |
1050 Description: Getter for the attribute method_link_species_set_id. Takes this from the first | |
1051 Bio::EnsEMBL::Compara::GenomicAlign object | |
1052 Returntype : integer | |
1053 Caller : object::methodname | |
1054 Status : Stable | |
1055 | |
1056 =cut | |
1057 | |
1058 sub method_link_species_set_id { | |
1059 my ($self) = @_; | |
1060 | |
1061 my $method_link_species_set_id = $self->get_all_leaves->[0]->genomic_align_group->genomic_align_array->[0]->method_link_species_set->dbID; | |
1062 | |
1063 return $method_link_species_set_id; | |
1064 } | |
1065 | |
1066 #sub DESTROY { | |
1067 # my ($self) = @_; | |
1068 # $self->release_tree unless ($self->{_parent_link}); | |
1069 # } | |
1070 | |
1071 1; |