0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =head1 NAME
|
|
20
|
|
21 Bio::EnsEMBL::Compara::GenomicAlignBlock - Alignment of two or more pieces of genomic DNA
|
|
22
|
|
23 =head1 SYNOPSIS
|
|
24
|
|
25 use Bio::EnsEMBL::Compara::GenomicAlignBlock;
|
|
26
|
|
27 my $genomic_align_block = new Bio::EnsEMBL::Compara::GenomicAlignBlock(
|
|
28 -adaptor => $genomic_align_block_adaptor,
|
|
29 -method_link_species_set => $method_link_species_set,
|
|
30 -score => 56.2,
|
|
31 -length => 1203,
|
|
32 -genomic_align_array => [$genomic_align1, $genomic_align2...]
|
|
33 );
|
|
34
|
|
35 SET VALUES
|
|
36 $genomic_align_block->adaptor($gen_ali_blk_adaptor);
|
|
37 $genomic_align_block->dbID(12);
|
|
38 $genomic_align_block->method_link_species_set($method_link_species_set);
|
|
39 $genomic_align_block->reference_genomic_align_id(35123);
|
|
40 $genomic_align_block->genomic_align_array([$genomic_align1, $genomic_align2]);
|
|
41 $genomic_align_block->reference_slice($reference_slice);
|
|
42 $genomic_align_block->reference_slice_start(1035);
|
|
43 $genomic_align_block->reference_slice_end(1283);
|
|
44 $genomic_align_block->score(56.2);
|
|
45 $genomic_align_block->length(562);
|
|
46
|
|
47 GET VALUES
|
|
48 my $genomic_align_block_adaptor = $genomic_align_block->adaptor();
|
|
49 my $dbID = $genomic_align_block->dbID();
|
|
50 my $method_link_species_set = $genomic_align_block->method_link_species_set;
|
|
51 my $genomic_aligns = $genomic_align_block->genomic_align_array();
|
|
52 my $reference_genomic_align = $genomic_align_block->reference_genomic_align();
|
|
53 my $non_reference_genomic_aligns = $genomic_align_block->get_all_non_reference_genomic_aligns();
|
|
54 my $reference_slice = $genomic_align_block->reference_slice();
|
|
55 my $reference_slice_start = $genomic_align_block->reference_slice_start();
|
|
56 my $reference_slice_end = $genomic_align_block->reference_slice_end();
|
|
57 my $score = $genomic_align_block->score();
|
|
58 my $length = $genomic_align_block->length;
|
|
59 my alignment_strings = $genomic_align_block->alignment_strings;
|
|
60 my $genomic_align_block_is_on_the_original_strand =
|
|
61 $genomic_align_block->get_original_strand;
|
|
62
|
|
63 =head1 DESCRIPTION
|
|
64
|
|
65 The GenomicAlignBlock object stores information about an alignment comprising of two or more pieces of genomic DNA.
|
|
66
|
|
67
|
|
68 =head1 OBJECT ATTRIBUTES
|
|
69
|
|
70 =over
|
|
71
|
|
72 =item dbID
|
|
73
|
|
74 corresponds to genomic_align_block.genomic_align_block_id
|
|
75
|
|
76 =item adaptor
|
|
77
|
|
78 Bio::EnsEMBL::Compara::DBSQL::GenomicAlignBlockAdaptor object to access DB
|
|
79
|
|
80 =item method_link_species_set_id
|
|
81
|
|
82 corresponds to method_link_species_set.method_link_species_set_id (external ref.)
|
|
83
|
|
84 =item method_link_species_set
|
|
85
|
|
86 Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object corresponding to method_link_species_set_id
|
|
87
|
|
88 =item score
|
|
89
|
|
90 corresponds to genomic_align_block.score
|
|
91
|
|
92 =item perc_id
|
|
93
|
|
94 corresponds to genomic_align_block.perc_id
|
|
95
|
|
96 =item length
|
|
97
|
|
98 corresponds to genomic_align_block.length
|
|
99
|
|
100 =item group_id
|
|
101
|
|
102 corresponds to the genomic_align_block.group_id
|
|
103
|
|
104 =item reference_genomic_align_id
|
|
105
|
|
106 When looking for genomic alignments in a given slice or dnafrag, the
|
|
107 reference_genomic_align corresponds to the Bio::EnsEMBL::Compara::GenomicAlign
|
|
108 included in the starting slice or dnafrag. The reference_genomic_align_id is
|
|
109 the dbID corresponding to the reference_genomic_align. All remaining
|
|
110 Bio::EnsEMBL::Compara::GenomicAlign objects included in the
|
|
111 Bio::EnsEMBL::Compara::GenomicAlignBlock are the non_reference_genomic_aligns.
|
|
112
|
|
113 =item reference_genomic_align
|
|
114
|
|
115 Bio::EnsEMBL::Compara::GenomicAling object corresponding to reference_genomic_align_id
|
|
116
|
|
117 =item genomic_align_array
|
|
118
|
|
119 listref of Bio::EnsEMBL::Compara::GenomicAlign objects corresponding to this
|
|
120 Bio::EnsEMBL::Compara::GenomicAlignBlock object
|
|
121
|
|
122 =item reference_slice
|
|
123
|
|
124 This is the Bio::EnsEMBL::Slice object used as argument to the
|
|
125 Bio::EnsEMBL::Compara::DBSQL::GenomicAlignBlockAdaptor->fetch_all_by_Slice method.
|
|
126
|
|
127 =item reference_slice_start
|
|
128
|
|
129 starting position in the coordinates system defined by the reference_slice
|
|
130
|
|
131 =item reference_slice_end
|
|
132
|
|
133 ending position in the coordinates system defined by the reference_slice
|
|
134
|
|
135 =back
|
|
136
|
|
137 =head1 APPENDIX
|
|
138
|
|
139 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
|
|
140
|
|
141 =cut
|
|
142
|
|
143
|
|
144 # Let the code begin...
|
|
145
|
|
146
|
|
147 package Bio::EnsEMBL::Compara::GenomicAlignBlock;
|
|
148 use strict;
|
|
149
|
|
150 # Object preamble
|
|
151 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
152 use Bio::EnsEMBL::Utils::Exception qw(throw warning info deprecate verbose);
|
|
153 use Bio::EnsEMBL::Compara::GenomicAlign;
|
|
154 use Bio::SimpleAlign;
|
|
155 use Bio::EnsEMBL::Compara::BaseGenomicAlignSet;
|
|
156
|
|
157 our @ISA = qw(Bio::EnsEMBL::Compara::BaseGenomicAlignSet);
|
|
158
|
|
159 =head2 new (CONSTRUCTOR)
|
|
160
|
|
161 Arg [-DBID] : (opt.) int $dbID (the database internal ID for this object)
|
|
162 Arg [-ADAPTOR]
|
|
163 : (opt.) Bio::EnsEMBL::Compara::DBSQL::GenomicAlignBlockAdaptor $adaptor
|
|
164 (the adaptor for connecting to the database)
|
|
165 Arg [-METHOD_LINK_SPECIES_SET]
|
|
166 : (opt.) Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $mlss
|
|
167 (this defines the type of alignment and the set of species used
|
|
168 to get this GenomicAlignBlock)
|
|
169 Arg [-METHOD_LINK_SPECIES_SET_ID]
|
|
170 : (opt.) int $mlss_id (the database internal ID for the $mlss)
|
|
171 Arg [-SCORE]: (opt.) float $score (the score of this alignment)
|
|
172 Arg [-PERC_ID]
|
|
173 : (opt.) int $perc_id (the percentage of identity, only used for pairwise)
|
|
174 Arg [-LENGTH]
|
|
175 : (opt.) int $length (the length of this alignment, taking into account
|
|
176 gaps and all)
|
|
177 Arg [-GROUP_ID]
|
|
178 : (opt.) int $group)id (the group ID for this alignment)
|
|
179 Arg [-REFERENCE_GENOMIC_ALIGN]
|
|
180 : (opt.) Bio::EnsEMBL::Compara::GenomicAlign $reference_genomic_align (the
|
|
181 Bio::EnsEMBL::Compara::GenomicAlign corresponding to the requesting
|
|
182 Bio::EnsEMBL::Compara::DnaFrag or Bio::EnsEMBL::Slice when this
|
|
183 Bio::EnsEMBL::Compara::GenomicAlignBlock has been fetched from a
|
|
184 Bio::EnsEMBL::Compara::DnaFrag or a Bio::EnsEMBL::Slice)
|
|
185 Arg [-REFERENCE_GENOMIC_ALIGN_ID]
|
|
186 : (opt.) int $reference_genomic_align (the database internal ID of the
|
|
187 $reference_genomic_align)
|
|
188 Arg [-GENOMIC_ALIGN_ARRAY]
|
|
189 : (opt.) array_ref $genomic_aligns (a reference to the array of
|
|
190 Bio::EnsEMBL::Compara::GenomicAlign objects corresponding to this
|
|
191 Bio::EnsEMBL::Compara::GenomicAlignBlock object)
|
|
192 Example : my $genomic_align_block =
|
|
193 new Bio::EnsEMBL::Compara::GenomicAlignBlock(
|
|
194 -adaptor => $gaba,
|
|
195 -method_link_species_set => $method_link_species_set,
|
|
196 -score => 56.2,
|
|
197 -length => 1203,
|
|
198 -group_id => 1234,
|
|
199 -genomic_align_array => [$genomic_align1, $genomic_align2...]
|
|
200 );
|
|
201 Description: Creates a new GenomicAlignBlock object
|
|
202 Returntype : Bio::EnsEMBL::Compara::DBSQL::GenomicAlignBlock
|
|
203 Exceptions : none
|
|
204 Caller : general
|
|
205 Status : Stable
|
|
206
|
|
207 =cut
|
|
208
|
|
209 sub new {
|
|
210 my($class, @args) = @_;
|
|
211
|
|
212 my $self = {};
|
|
213 bless $self,$class;
|
|
214
|
|
215 my ($adaptor, $dbID, $method_link_species_set, $method_link_species_set_id,
|
|
216 $score, $perc_id, $length, $group_id, $level_id, $reference_genomic_align, $reference_genomic_align_id,
|
|
217 $genomic_align_array, $starting_genomic_align_id, $ungapped_genomic_align_blocks) =
|
|
218 rearrange([qw(
|
|
219 ADAPTOR DBID METHOD_LINK_SPECIES_SET METHOD_LINK_SPECIES_SET_ID
|
|
220 SCORE PERC_ID LENGTH GROUP_ID LEVEL_ID REFERENCE_GENOMIC_ALIGN REFERENCE_GENOMIC_ALIGN_ID
|
|
221 GENOMIC_ALIGN_ARRAY STARTING_GENOMIC_ALIGN_ID UNGAPPED_GENOMIC_ALIGN_BLOCKS)],
|
|
222 @args);
|
|
223
|
|
224 $self->{_original_strand} = 1;
|
|
225
|
|
226 if (defined($ungapped_genomic_align_blocks)) {
|
|
227 return $self->_create_from_a_list_of_ungapped_genomic_align_blocks($ungapped_genomic_align_blocks);
|
|
228 }
|
|
229 $self->adaptor($adaptor) if (defined ($adaptor));
|
|
230 $self->dbID($dbID) if (defined ($dbID));
|
|
231 $self->method_link_species_set($method_link_species_set) if (defined ($method_link_species_set));
|
|
232 $self->method_link_species_set_id($method_link_species_set_id)
|
|
233 if (defined ($method_link_species_set_id));
|
|
234 $self->score($score) if (defined ($score));
|
|
235 $self->perc_id($perc_id) if (defined ($perc_id));
|
|
236 $self->length($length) if (defined ($length));
|
|
237 $self->group_id($group_id) if (defined ($group_id));
|
|
238 $self->level_id($level_id) if (defined ($level_id));
|
|
239 $self->reference_genomic_align($reference_genomic_align)
|
|
240 if (defined($reference_genomic_align));
|
|
241 $self->reference_genomic_align_id($reference_genomic_align_id)
|
|
242 if (defined($reference_genomic_align_id));
|
|
243 $self->genomic_align_array($genomic_align_array) if (defined($genomic_align_array));
|
|
244
|
|
245 $self->starting_genomic_align_id($starting_genomic_align_id) if (defined($starting_genomic_align_id));
|
|
246
|
|
247 return $self;
|
|
248 }
|
|
249
|
|
250 =head2 new_fast
|
|
251
|
|
252 Arg [1] : hash reference $hashref
|
|
253 Example : none
|
|
254 Description: This is an ultra fast constructor which requires knowledge of
|
|
255 the objects internals to be used.
|
|
256 Returntype :
|
|
257 Exceptions : none
|
|
258 Caller :
|
|
259 Status : Stable
|
|
260
|
|
261 =cut
|
|
262
|
|
263 sub new_fast {
|
|
264 my $class = shift;
|
|
265 my $hashref = shift;
|
|
266
|
|
267 return bless $hashref, $class;
|
|
268 }
|
|
269
|
|
270
|
|
271 =head2 adaptor
|
|
272
|
|
273 Arg [1] : Bio::EnsEMBL::Compara::DBSQL::GenomicAlignBlockAdaptor $adaptor
|
|
274 Example : my $gen_ali_blk_adaptor = $genomic_align_block->adaptor();
|
|
275 Example : $genomic_align_block->adaptor($gen_ali_blk_adaptor);
|
|
276 Description: Getter/Setter for the adaptor this object uses for database
|
|
277 interaction.
|
|
278 Returntype : Bio::EnsEMBL::Compara::DBSQL::GenomicAlignBlockAdaptor object
|
|
279 Exceptions : thrown if $adaptor is not a
|
|
280 Bio::EnsEMBL::Compara::DBSQL::GenomicAlignBlockAdaptor object
|
|
281 Caller : general
|
|
282 Status : Stable
|
|
283
|
|
284 =cut
|
|
285
|
|
286 sub adaptor {
|
|
287 my ($self, $adaptor) = @_;
|
|
288
|
|
289 if (defined($adaptor)) {
|
|
290 throw("$adaptor is not a Bio::EnsEMBL::Compara::DBSQL::GenomicAlignBlockAdaptor object")
|
|
291 unless ($adaptor->isa("Bio::EnsEMBL::Compara::DBSQL::GenomicAlignBlockAdaptor"));
|
|
292 $self->{'adaptor'} = $adaptor;
|
|
293 }
|
|
294
|
|
295 return $self->{'adaptor'};
|
|
296 }
|
|
297
|
|
298
|
|
299 =head2 dbID
|
|
300
|
|
301 Arg [1] : integer $dbID
|
|
302 Example : my $dbID = $genomic_align_block->dbID();
|
|
303 Example : $genomic_align_block->dbID(12);
|
|
304 Description: Getter/Setter for the attribute dbID
|
|
305 Returntype : integer
|
|
306 Exceptions : none
|
|
307 Caller : general
|
|
308 Status : Stable
|
|
309
|
|
310 =cut
|
|
311
|
|
312 sub dbID {
|
|
313 my ($self, $dbID) = @_;
|
|
314
|
|
315 if (defined($dbID)) {
|
|
316 $self->{'dbID'} = $dbID;
|
|
317 }
|
|
318
|
|
319 return $self->{'dbID'};
|
|
320 }
|
|
321
|
|
322
|
|
323 =head2 method_link_species_set
|
|
324
|
|
325 Arg [1] : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $method_link_species_set
|
|
326 Example : $method_link_species_set = $genomic_align_block->method_link_species_set;
|
|
327 Example : $genomic_align_block->method_link_species_set($method_link_species_set);
|
|
328 Description: get/set for attribute method_link_species_set. If no
|
|
329 argument is given, the method_link_species_set is not defined but
|
|
330 both the method_link_species_set_id and the adaptor are, it tries
|
|
331 to fetch the data using the method_link_species_set_id
|
|
332 Returntype : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object
|
|
333 Exceptions : thrown if $method_link_species_set is not a
|
|
334 Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object
|
|
335 Caller : general
|
|
336 Status : Stable
|
|
337
|
|
338 =cut
|
|
339
|
|
340 sub method_link_species_set {
|
|
341 my ($self, $method_link_species_set) = @_;
|
|
342
|
|
343 if (defined($method_link_species_set)) {
|
|
344 throw("$method_link_species_set is not a Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object")
|
|
345 unless ($method_link_species_set->isa("Bio::EnsEMBL::Compara::MethodLinkSpeciesSet"));
|
|
346 $self->{'method_link_species_set'} = $method_link_species_set;
|
|
347 if ($self->{'method_link_species_set_id'}) {
|
|
348 $self->{'method_link_species_set_id'} = $self->{'method_link_species_set'}->dbID;
|
|
349 }
|
|
350 ## Update the MethodLinkSpeciesSet for the GenomicAligns included in this GenomicAlignBlock
|
|
351 if (defined($self->{genomic_align_array})) {
|
|
352 foreach my $this_genomic_align (@{$self->{genomic_align_array}}) {
|
|
353 $this_genomic_align->method_link_species_set($method_link_species_set);
|
|
354 }
|
|
355 }
|
|
356
|
|
357 } elsif (!defined($self->{'method_link_species_set'}) and defined($self->{'adaptor'})
|
|
358 and defined($self->method_link_species_set_id)) {
|
|
359 # Try to get object from ID. Use method_link_species_set_id function and not the attribute in the <if>
|
|
360 # clause because the attribute can be retrieved from other sources if it has not been already defined.
|
|
361 my $mlssa = $self->adaptor->db->get_MethodLinkSpeciesSetAdaptor;
|
|
362 $self->{'method_link_species_set'} = $mlssa->fetch_by_dbID($self->{'method_link_species_set_id'})
|
|
363 }
|
|
364
|
|
365 return $self->{'method_link_species_set'};
|
|
366 }
|
|
367
|
|
368
|
|
369 =head2 method_link_species_set_id
|
|
370
|
|
371 Arg [1] : integer $method_link_species_set_id
|
|
372 Example : $method_link_species_set_id = $genomic_align_block->method_link_species_set_id;
|
|
373 Example : $genomic_align_block->method_link_species_set_id(3);
|
|
374 Description: Getter/Setter for the attribute method_link_species_set_id. If no
|
|
375 argument is given, the method_link_species_set_id is not defined but
|
|
376 the method_link_species_set is, it tries to get the data from the
|
|
377 method_link_species_set object. If this fails, it tries to get and set
|
|
378 all the direct attributes from the database using the dbID of the
|
|
379 Bio::Ensembl::Compara::GenomicAlignBlock object.
|
|
380 Returntype : integer
|
|
381 Exceptions : thrown if $method_link_species_set_id does not match a previously defined
|
|
382 method_link_species_set
|
|
383 Caller : object::methodname
|
|
384 Status : Stable
|
|
385
|
|
386 =cut
|
|
387
|
|
388 sub method_link_species_set_id {
|
|
389 my ($self, $method_link_species_set_id) = @_;
|
|
390
|
|
391 if (defined($method_link_species_set_id)) {
|
|
392 $self->{'method_link_species_set_id'} = $method_link_species_set_id;
|
|
393 if (defined($self->{'method_link_species_set'}) and $self->{'method_link_species_set_id'}) {
|
|
394 $self->{'method_link_species_set'} = undef;
|
|
395 }
|
|
396 ## Update the MethodLinkSpeciesSet for the GenomicAligns included in this GenomicAlignBlock
|
|
397 if (defined($self->{genomic_align_array})) {
|
|
398 foreach my $this_genomic_align (@{$self->{genomic_align_array}}) {
|
|
399 $this_genomic_align->method_link_species_set_id($method_link_species_set_id);
|
|
400 }
|
|
401 }
|
|
402
|
|
403 } elsif (!($self->{'method_link_species_set_id'})) {
|
|
404 # Try to get the ID from other sources...
|
|
405 if (defined($self->{'method_link_species_set'})) {
|
|
406 # ...from the object
|
|
407 $self->{'method_link_species_set_id'} = $self->{'method_link_species_set'}->dbID;
|
|
408 } elsif (defined($self->{'adaptor'}) and defined($self->dbID)) {
|
|
409 # ...from the database, using the dbID of the Bio::Ensembl::Compara::GenomicAlignBlock object
|
|
410 $self->adaptor->retrieve_all_direct_attributes($self);
|
|
411 }
|
|
412 }
|
|
413
|
|
414 return $self->{'method_link_species_set_id'};
|
|
415 }
|
|
416
|
|
417 =head2 starting_genomic_align_id (DEPRECATED)
|
|
418
|
|
419 DEPRECATED! Use Bio::EnsEMBL::Compara::GenomicAlignBlock->reference_genomic_align_id() method instead
|
|
420
|
|
421 Arg [1] : integer $reference_genomic_align_id
|
|
422 Example : $genomic_align_block->starting_genomic_align_id(4321);
|
|
423 Description: set for attribute reference_genomic_align_id. A value of 0 will set the
|
|
424 reference_genomic_align_id attribute to undef. When looking for genomic
|
|
425 alignments in a given slice or dnafrag, the reference_genomic_align
|
|
426 corresponds to the Bio::EnsEMBL::Compara::GenomicAlign included in the
|
|
427 starting slice or dnafrag. The reference_genomic_align_id is the dbID
|
|
428 corresponding to the reference_genomic_align. All remaining
|
|
429 Bio::EnsEMBL::Compara::GenomicAlign objects included in the
|
|
430 Bio::EnsEMBL::Compara::GenomicAlignBlock are the non_reference_genomic_aligns.
|
|
431 Returntype : none
|
|
432 Exceptions : throw if $reference_genomic_align_id id not a postive number
|
|
433 Caller : $genomic_align_block->starting_genomic_align_id(int)
|
|
434
|
|
435 =cut
|
|
436
|
|
437 sub starting_genomic_align_id {
|
|
438 my $self = shift;
|
|
439 deprecate("Use Bio::EnsEMBL::Compara::GenomicAlignBlock->reference_genomic_align_id() method instead");
|
|
440 return $self->reference_genomic_align_id(@_);
|
|
441 }
|
|
442
|
|
443 =head2 starting_genomic_align (DEPRECATED)
|
|
444
|
|
445 DEPRECATED! Use Bio::EnsEMBL::Compara::GenomicAlignBlock->reference_genomic_align() method instead
|
|
446
|
|
447 Arg [1] : (none)
|
|
448 Example : $genomic_align = $genomic_align_block->starting_genomic_align();
|
|
449 Description: get the reference_genomic_align. When looking for genomic alignments in
|
|
450 a given slice or dnafrag, the reference_genomic_align corresponds to the
|
|
451 Bio::EnsEMBL::Compara::GenomicAlign included in the starting slice or
|
|
452 dnafrag. The reference_genomic_align_id is the dbID corresponding to the
|
|
453 reference_genomic_align. All remaining Bio::EnsEMBL::Compara::GenomicAlign
|
|
454 objects included in the Bio::EnsEMBL::Compara::GenomicAlignBlock are the
|
|
455 non_reference_genomic_aligns.
|
|
456 Returntype : Bio::EnsEMBL::Compara::GenomicAlign object
|
|
457 Exceptions : warns if no reference_genomic_align_id has been set and returns a ref.
|
|
458 to an empty array
|
|
459 Exceptions : warns if no genomic_align_array has been set and returns a ref.
|
|
460 to an empty array
|
|
461 Exceptions : throw if reference_genomic_align_id does not match any of the
|
|
462 Bio::EnsEMBL::Compara::GenomicAlign objects in the genomic_align_array
|
|
463 Caller : $genomic_align_block->starting_genomic_align()
|
|
464
|
|
465 =cut
|
|
466
|
|
467 sub starting_genomic_align {
|
|
468 my $self = shift;
|
|
469 deprecate("Use Bio::EnsEMBL::Compara::GenomicAlignBlock->reference_genomic_align() method instead");
|
|
470 return $self->reference_genomic_align(@_);
|
|
471 }
|
|
472
|
|
473
|
|
474 =head2 resulting_genomic_aligns
|
|
475
|
|
476 DEPRECATED! Use Bio::EnsEMBL::Compara::GenomicAlignBlock->get_all_non_reference_genomic_align()
|
|
477 method instead
|
|
478
|
|
479 Arg [1] : (none)
|
|
480 Example : $genomic_aligns = $genomic_align_block->resulting_genomic_aligns();
|
|
481 Description: get the all the non_reference_genomic_aligns. When looking for genomic
|
|
482 alignments in a given slice or dnafrag, the reference_genomic_align
|
|
483 corresponds to the Bio::EnsEMBL::Compara::GenomicAlign included in the
|
|
484 reference slice or dnafrag. The reference_genomic_align_id is the dbID
|
|
485 corresponding to the reference_genomic_align. All remaining
|
|
486 Bio::EnsEMBL::Compara::GenomicAlign objects included in the
|
|
487 Bio::EnsEMBL::Compara::GenomicAlignBlock are the
|
|
488 non_reference_genomic_aligns.
|
|
489 Returntype : a ref. to an array of Bio::EnsEMBL::Compara::GenomicAlign objects
|
|
490 Exceptions : warns if no reference_genomic_align_id has been set and returns a ref.
|
|
491 to an empty array
|
|
492 Exceptions : warns if no genomic_align_array has been set and returns a ref.
|
|
493 to an empty array
|
|
494 Caller : $genomic_align_block->resulting_genomic_aligns()
|
|
495
|
|
496 =cut
|
|
497
|
|
498 sub resulting_genomic_aligns {
|
|
499 my ($self) = @_;
|
|
500 deprecate("Use Bio::EnsEMBL::Compara::GenomicAlignBlock->get_all_non_reference_genomic_aligns() method instead");
|
|
501 return $self->get_all_non_reference_genomic_aligns(@_);
|
|
502 }
|
|
503
|
|
504
|
|
505 =head2 reference_genomic_align
|
|
506
|
|
507 Arg [1] : (optional) Bio::EnsEMBL::Compara::GenomicAlign $reference_genomic_align
|
|
508 Example : $genomic_align_block->reference_genomic_align($reference_genomic_align);
|
|
509 Example : $genomic_align = $genomic_align_block->reference_genomic_align();
|
|
510 Description: get/set the reference_genomic_align. When looking for genomic alignments in
|
|
511 a given slice or dnafrag, the reference_genomic_align corresponds to the
|
|
512 Bio::EnsEMBL::Compara::GenomicAlign included in the starting slice or
|
|
513 dnafrag. The reference_genomic_align_id is the dbID corresponding to the
|
|
514 reference_genomic_align. All remaining Bio::EnsEMBL::Compara::GenomicAlign
|
|
515 objects included in the Bio::EnsEMBL::Compara::GenomicAlignBlock are the
|
|
516 non_reference_genomic_aligns.
|
|
517 Synchronises reference_genomic_align and reference_genomic_align_id
|
|
518 attributes.
|
|
519 Returntype : Bio::EnsEMBL::Compara::GenomicAlign object
|
|
520 Exceptions : throw if reference_genomic_align is not a Bio::EnsEMBL::Compara::GenomicAlign
|
|
521 object
|
|
522 Exceptions : throw if reference_genomic_align_id does not match any of the
|
|
523 Bio::EnsEMBL::Compara::GenomicAlign objects in the genomic_align_array
|
|
524 Caller : $genomic_align_block->reference_genomic_align()
|
|
525 Status : Stable
|
|
526
|
|
527 =cut
|
|
528
|
|
529 sub reference_genomic_align {
|
|
530 my ($self, $reference_genomic_align) = @_;
|
|
531
|
|
532 if (defined($reference_genomic_align)) {
|
|
533 throw("[$reference_genomic_align] must be a Bio::EnsEMBL::Compara::GenomicAlign object")
|
|
534 unless($reference_genomic_align and ref($reference_genomic_align) and
|
|
535 $reference_genomic_align->isa("Bio::EnsEMBL::Compara::GenomicAlign"));
|
|
536 $self->{'reference_genomic_align'} = $reference_genomic_align;
|
|
537
|
|
538 ## Synchronises reference_genomic_align and reference_genomic_align_id attributes
|
|
539 if (defined($self->{'reference_genomic_align'}->dbID)) {
|
|
540 $self->{'reference_genomic_align_id'} = $self->{'reference_genomic_align'}->dbID;
|
|
541 }
|
|
542
|
|
543 ## Try to get data from other sources...
|
|
544 } elsif (!defined($self->{'reference_genomic_align'})) {
|
|
545
|
|
546 ## ...from the reference_genomic_align_id attribute
|
|
547 if (defined($self->{'reference_genomic_align_id'}) and @{$self->get_all_GenomicAligns}) {
|
|
548 my $reference_genomic_align_id = $self->{'reference_genomic_align_id'};
|
|
549 foreach my $this_genomic_align (@{$self->get_all_GenomicAligns}) {
|
|
550 if ($this_genomic_align->dbID == $reference_genomic_align_id) {
|
|
551 $self->{'reference_genomic_align'} = $this_genomic_align;
|
|
552 return $this_genomic_align;
|
|
553 }
|
|
554 }
|
|
555 throw("[$self] Cannot found Bio::EnsEMBL::Compara::GenomicAlign::reference_genomic_align_id".
|
|
556 " ($reference_genomic_align_id) in the genomic_align_array");
|
|
557 }
|
|
558 }
|
|
559
|
|
560 return $self->{'reference_genomic_align'};
|
|
561 }
|
|
562
|
|
563 =head2 reference_genomic_align_id
|
|
564
|
|
565 Arg [1] : integer $reference_genomic_align_id
|
|
566 Example : $genomic_align_block->reference_genomic_align_id(4321);
|
|
567 Description: get/set for attribute reference_genomic_align_id. A value of 0 will set the
|
|
568 reference_genomic_align_id attribute to undef. When looking for genomic
|
|
569 alignments in a given slice or dnafrag, the reference_genomic_align
|
|
570 corresponds to the Bio::EnsEMBL::Compara::GenomicAlign included in the
|
|
571 starting slice or dnafrag. The reference_genomic_align_id is the dbID
|
|
572 corresponding to the reference_genomic_align. All remaining
|
|
573 Bio::EnsEMBL::Compara::GenomicAlign objects included in the
|
|
574 Bio::EnsEMBL::Compara::GenomicAlignBlock are the
|
|
575 non_reference_genomic_aligns.
|
|
576 Synchronises reference_genomic_align and reference_genomic_align_id
|
|
577 attributes.
|
|
578 Returntype : integer
|
|
579 Exceptions : throw if $reference_genomic_align_id id not a postive number
|
|
580 Caller : $genomic_align_block->reference_genomic_align_id(int)
|
|
581 Status : Stable
|
|
582
|
|
583 =cut
|
|
584
|
|
585 sub reference_genomic_align_id {
|
|
586 my ($self, $reference_genomic_align_id) = @_;
|
|
587
|
|
588 if (defined($reference_genomic_align_id)) {
|
|
589 if ($reference_genomic_align_id !~ /^\d+$/) {
|
|
590 throw "[$reference_genomic_align_id] should be a positive number.";
|
|
591 }
|
|
592 $self->{'reference_genomic_align_id'} = ($reference_genomic_align_id or undef);
|
|
593
|
|
594 ## Synchronises reference_genomic_align and reference_genomic_align_id
|
|
595 if (defined($self->{'reference_genomic_align'}) and
|
|
596 defined($self->{'reference_genomic_align'}->dbID) and
|
|
597 ($self->{'reference_genomic_align'}->dbID != ($self->{'reference_genomic_align_id'} or 0))) {
|
|
598 $self->{'reference_genomic_align'} = undef; ## Attribute will be set on request
|
|
599 }
|
|
600
|
|
601 ## Try to get data from other sources...
|
|
602 } elsif (!defined($self->{'reference_genomic_align_id'})) {
|
|
603
|
|
604 ## ...from the reference_genomic_align attribute
|
|
605 if (defined($self->{'reference_genomic_align'}) and
|
|
606 defined($self->{'reference_genomic_align'}->dbID)) {
|
|
607 $self->{'reference_genomic_align_id'} = $self->{'reference_genomic_align'}->dbID;
|
|
608 }
|
|
609 }
|
|
610
|
|
611 return $self->{'reference_genomic_align_id'};
|
|
612 }
|
|
613
|
|
614 =head2 get_all_non_reference_genomic_aligns
|
|
615
|
|
616 Arg [1] : (none)
|
|
617 Example : $genomic_aligns = $genomic_align_block->get_all_non_reference_genomic_aligns();
|
|
618 Description: get the non_reference_genomic_aligns. When looking for genomic alignments in
|
|
619 a given slice or dnafrag, the reference_genomic_align corresponds to the
|
|
620 Bio::EnsEMBL::Compara::GenomicAlign included in the starting slice or
|
|
621 dnafrag. The reference_genomic_align_id is the dbID corresponding to the
|
|
622 reference_genomic_align. All remaining Bio::EnsEMBL::Compara::GenomicAlign
|
|
623 objects included in the Bio::EnsEMBL::Compara::GenomicAlignBlock are the
|
|
624 non_reference_genomic_aligns.
|
|
625 Returntype : a ref. to an array of Bio::EnsEMBL::Compara::GenomicAlign objects
|
|
626 Exceptions : warns if no reference_genomic_align_id has been set and returns a ref.
|
|
627 to an empty array
|
|
628 Exceptions : warns if no genomic_align_array has been set and returns a ref.
|
|
629 to an empty array
|
|
630 Caller : $genomic_align_block->non_reference_genomic_aligns()
|
|
631 Status : Stable
|
|
632
|
|
633 =cut
|
|
634
|
|
635 sub get_all_non_reference_genomic_aligns {
|
|
636 my ($self) = @_;
|
|
637 my $all_non_reference_genomic_aligns = [];
|
|
638
|
|
639 my $reference_genomic_align_id = $self->reference_genomic_align_id;
|
|
640 my $reference_genomic_align = $self->reference_genomic_align;
|
|
641 if (!defined($reference_genomic_align_id) and !defined($reference_genomic_align)) {
|
|
642 warning("Trying to get Bio::EnsEMBL::Compara::GenomicAlign::all_non_reference_genomic_aligns".
|
|
643 " when no reference_genomic_align has been set before");
|
|
644 return $all_non_reference_genomic_aligns;
|
|
645 }
|
|
646 my $genomic_aligns = $self->get_all_GenomicAligns; ## Lazy loading compliant
|
|
647 if (!@$genomic_aligns) {
|
|
648 warning("Trying to get Bio::EnsEMBL::Compara::GenomicAlign::all_non_reference_genomic_aligns".
|
|
649 " when no genomic_align_array can be retrieved");
|
|
650 return $all_non_reference_genomic_aligns;
|
|
651 }
|
|
652
|
|
653 foreach my $this_genomic_align (@$genomic_aligns) {
|
|
654 if (($this_genomic_align->dbID or -1) != ($reference_genomic_align_id or -2) and
|
|
655 $this_genomic_align != $reference_genomic_align) {
|
|
656 push(@$all_non_reference_genomic_aligns, $this_genomic_align);
|
|
657 }
|
|
658 }
|
|
659
|
|
660 return $all_non_reference_genomic_aligns;
|
|
661 }
|
|
662
|
|
663
|
|
664 =head2 genomic_align_array
|
|
665
|
|
666 Arg [1] : array reference containing Bio::EnsEMBL::Compara::GenomicAlign objects
|
|
667 Example : $genomic_aligns = $genomic_align_block->genomic_align_array();
|
|
668 $genomic_align_block->genomic_align_array([$genomic_align1, $genomic_align2]);
|
|
669 Description: get/set for attribute genomic_align_array. If no argument is given, the
|
|
670 genomic_align_array is not defined but both the dbID and the adaptor are,
|
|
671 it tries to fetch the data from the database using the dbID of the
|
|
672 Bio::EnsEMBL::Compara::GenomicAlignBlock object.
|
|
673 You can unset all cached GenomicAlign using 0 as argument. They will be
|
|
674 loaded again from the database if needed.
|
|
675 Returntype : array reference containing Bio::EnsEMBL::Compara::GenomicAlign objects
|
|
676 Exceptions : none
|
|
677 Caller : general
|
|
678 Status : Stable
|
|
679
|
|
680 =cut
|
|
681
|
|
682 sub genomic_align_array {
|
|
683 my ($self, $genomic_align_array) = @_;
|
|
684
|
|
685 if (defined($genomic_align_array)) {
|
|
686 if (!$genomic_align_array) {
|
|
687 ## Clean cache.
|
|
688 $self->{'genomic_align_array'} = undef;
|
|
689 $self->{'reference_genomic_align'} = undef;
|
|
690 return undef;
|
|
691 }
|
|
692 foreach my $genomic_align (@$genomic_align_array) {
|
|
693 throw("[$genomic_align] is not a Bio::EnsEMBL::Compara::GenomicAlign object")
|
|
694 unless (ref $genomic_align and $genomic_align->isa("Bio::EnsEMBL::Compara::GenomicAlign"));
|
|
695 # Create weak circular reference to genomic_align_block from each genomic_align
|
|
696 $genomic_align->genomic_align_block($self);
|
|
697 }
|
|
698 $self->{'genomic_align_array'} = $genomic_align_array;
|
|
699
|
|
700 } elsif (!defined($self->{'genomic_align_array'}) and defined($self->{'adaptor'})
|
|
701 and defined($self->{'dbID'})) {
|
|
702 # Fetch data from DB (allow lazy fetching of genomic_align_block objects)
|
|
703 my $genomic_align_adaptor = $self->adaptor->db->get_GenomicAlignAdaptor();
|
|
704 $self->{'genomic_align_array'} =
|
|
705 $genomic_align_adaptor->fetch_all_by_genomic_align_block_id($self->{'dbID'});
|
|
706 foreach my $this_genomic_align (@{$self->{'genomic_align_array'}}) {
|
|
707 $this_genomic_align->genomic_align_block($self);
|
|
708 }
|
|
709 }
|
|
710
|
|
711 return $self->{'genomic_align_array'};
|
|
712 }
|
|
713
|
|
714
|
|
715 =head2 add_GenomicAlign
|
|
716
|
|
717 Arg [1] : Bio::EnsEMBL::Compara::GenomicAlign $genomic_align
|
|
718 Example : $genomic_align_block->add_GenomicAlign($genomic_align);
|
|
719 Description: adds another Bio::EnsEMBL::Compara::GenomicAlign object to the set of
|
|
720 Bio::EnsEMBL::Compara::GenomicAlign objects in the attribute
|
|
721 genomic_align_array.
|
|
722 Returntype : Bio::EnsEMBL::Compara::GenomicAlign object
|
|
723 Exceptions : thrown if wrong argument
|
|
724 Caller : general
|
|
725 Status : Stable
|
|
726
|
|
727 =cut
|
|
728
|
|
729 sub add_GenomicAlign {
|
|
730 my ($self, $genomic_align) = @_;
|
|
731
|
|
732 throw("[$genomic_align] is not a Bio::EnsEMBL::Compara::GenomicAlign object")
|
|
733 unless ($genomic_align and ref($genomic_align) and
|
|
734 $genomic_align->isa("Bio::EnsEMBL::Compara::GenomicAlign"));
|
|
735 # Create weak circular reference to genomic_align_block from each genomic_align
|
|
736 $genomic_align->genomic_align_block($self);
|
|
737 push(@{$self->{'genomic_align_array'}}, $genomic_align);
|
|
738
|
|
739 return $genomic_align;
|
|
740 }
|
|
741
|
|
742
|
|
743 =head2 get_all_GenomicAligns
|
|
744
|
|
745 Arg [1] : none
|
|
746 Example : $genomic_aligns = $genomic_align_block->get_all_GenomicAligns();
|
|
747 Description: returns the set of Bio::EnsEMBL::Compara::GenomicAlign objects in
|
|
748 the attribute genomic_align_array.
|
|
749 Returntype : array reference containing Bio::EnsEMBL::Compara::GenomicAlign objects
|
|
750 Exceptions : none
|
|
751 Caller : general
|
|
752 Status : Stable
|
|
753
|
|
754 =cut
|
|
755
|
|
756 sub get_all_GenomicAligns {
|
|
757 my ($self) = @_;
|
|
758
|
|
759 return ($self->genomic_align_array or []);
|
|
760 }
|
|
761
|
|
762
|
|
763 =head2 score
|
|
764
|
|
765 Arg [1] : double $score
|
|
766 Example : my $score = $genomic_align_block->score();
|
|
767 $genomic_align_block->score(56.2);
|
|
768 Description: get/set for attribute score. If no argument is given, the score
|
|
769 is not defined but both the dbID and the adaptor are, it tries to
|
|
770 fetch and set all the direct attributes from the database using the
|
|
771 dbID of the Bio::EnsEMBL::Compara::GenomicAlignBlock object.
|
|
772 Returntype : double
|
|
773 Exceptions : none
|
|
774 Caller : general
|
|
775 Status : Stable
|
|
776
|
|
777 =cut
|
|
778
|
|
779 sub score {
|
|
780 my ($self, $score) = @_;
|
|
781
|
|
782 if (defined($score)) {
|
|
783 $self->{'score'} = $score;
|
|
784 } elsif (!defined($self->{'score'})) {
|
|
785 # Try to get the ID from other sources...
|
|
786 if (defined($self->{'adaptor'}) and defined($self->dbID)) {
|
|
787 # ...from the database, using the dbID of the Bio::Ensembl::Compara::GenomicAlignBlock object
|
|
788 $self->adaptor->retrieve_all_direct_attributes($self);
|
|
789 }
|
|
790 }
|
|
791
|
|
792 return $self->{'score'};
|
|
793 }
|
|
794
|
|
795
|
|
796 =head2 perc_id
|
|
797
|
|
798 Arg [1] : double $perc_id
|
|
799 Example : my $perc_id = $genomic_align_block->perc_id;
|
|
800 Example : $genomic_align_block->perc_id(95.4);
|
|
801 Description: get/set for attribute perc_id. If no argument is given, the perc_id
|
|
802 is not defined but both the dbID and the adaptor are, it tries to
|
|
803 fetch and set all the direct attributes from the database using the
|
|
804 dbID of the Bio::EnsEMBL::Compara::GenomicAlignBlock object.
|
|
805 Returntype : double
|
|
806 Exceptions : none
|
|
807 Caller : general
|
|
808 Status : Stable
|
|
809
|
|
810 =cut
|
|
811
|
|
812 sub perc_id {
|
|
813 my ($self, $perc_id) = @_;
|
|
814
|
|
815 if (defined($perc_id)) {
|
|
816 $self->{'perc_id'} = $perc_id;
|
|
817 } elsif (!defined($self->{'perc_id'})) {
|
|
818 # Try to get the ID from other sources...
|
|
819 if (defined($self->{'adaptor'}) and defined($self->dbID)) {
|
|
820 # ...from the database, using the dbID of the Bio::Ensembl::Compara::GenomicAlignBlock object
|
|
821 $self->adaptor->retrieve_all_direct_attributes($self);
|
|
822 }
|
|
823 }
|
|
824
|
|
825 return $self->{'perc_id'};
|
|
826 }
|
|
827
|
|
828
|
|
829 =head2 length
|
|
830
|
|
831 Arg [1] : integer $length
|
|
832 Example : my $length = $genomic_align_block->length;
|
|
833 Example : $genomic_align_block->length(562);
|
|
834 Description: get/set for attribute length. If no argument is given, the length
|
|
835 is not defined but both the dbID and the adaptor are, it tries to
|
|
836 fetch and set all the direct attributes from the database using the
|
|
837 dbID of the Bio::EnsEMBL::Compara::GenomicAlignBlock object.
|
|
838 Returntype : integer
|
|
839 Exceptions : none
|
|
840 Caller : general
|
|
841 Status : Stable
|
|
842
|
|
843 =cut
|
|
844
|
|
845 sub length {
|
|
846 my ($self, $length) = @_;
|
|
847
|
|
848 if (defined($length)) {
|
|
849 $self->{'length'} = $length;
|
|
850 } elsif (!defined($self->{'length'})) {
|
|
851 # Try to get the ID from other sources...
|
|
852 if (defined($self->{'adaptor'}) and defined($self->dbID)) {
|
|
853 # ...from the database, using the dbID of the Bio::Ensembl::Compara::GenomicAlignBlock object
|
|
854 $self->adaptor->retrieve_all_direct_attributes($self);
|
|
855 } elsif (@{$self->get_all_GenomicAligns} and $self->get_all_GenomicAligns->[0]->aligned_sequence("+FAKE_SEQ")) {
|
|
856 $self->{'length'} = CORE::length($self->get_all_GenomicAligns->[0]->aligned_sequence("+FAKE_SEQ"));
|
|
857 }
|
|
858 }
|
|
859
|
|
860 return $self->{'length'};
|
|
861 }
|
|
862
|
|
863 =head2 group_id
|
|
864
|
|
865 Arg [1] : integer $group_id
|
|
866 Example : my $group_id = $genomic_align_block->group_id;
|
|
867 Example : $genomic_align_block->group_id(1234);
|
|
868 Description: get/set for attribute group_id.
|
|
869 Returntype : integer
|
|
870 Exceptions : none
|
|
871 Caller : general
|
|
872 Status : At risk
|
|
873
|
|
874 =cut
|
|
875
|
|
876 sub group_id {
|
|
877 my ($self, $group_id) = @_;
|
|
878
|
|
879 if (defined($group_id)) {
|
|
880 $self->{'group_id'} = ($group_id);
|
|
881 } elsif (!defined($self->{'group_id'})) {
|
|
882 # Try to get the ID from other sources...
|
|
883 if (defined($self->{'adaptor'}) and defined($self->dbID)) {
|
|
884 # ...from the database, using the dbID of the Bio::Ensembl::Compara::GenomicAlignBlock object
|
|
885 $self->adaptor->retrieve_all_direct_attributes($self);
|
|
886 }
|
|
887 }
|
|
888 return $self->{'group_id'};
|
|
889 }
|
|
890
|
|
891 =head2 level_id
|
|
892
|
|
893 Arg [1] : int $level_id
|
|
894 Example : $level_id = $genomic_align->level_id;
|
|
895 Example : $genomic_align->level_id(1);
|
|
896 Description: get/set for attribute level_id. If no argument is given, the level_id
|
|
897 is not defined but both the dbID and the adaptor are, it tries to
|
|
898 fetch and set all the direct attributes from the database using the
|
|
899 dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
|
|
900 Returntype : int
|
|
901 Exceptions : none
|
|
902 Warning : warns if getting data from other sources fails.
|
|
903 Caller : object->methodname
|
|
904 Status : Stable
|
|
905
|
|
906 =cut
|
|
907
|
|
908 sub level_id {
|
|
909 my ($self, $level_id) = @_;
|
|
910
|
|
911 if (defined($level_id)) {
|
|
912 $self->{'level_id'} = $level_id;
|
|
913
|
|
914 } elsif (!defined($self->{'level_id'})) {
|
|
915 if (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
|
|
916 # Try to get the values from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlignBlock object
|
|
917 $self->adaptor->retrieve_all_direct_attributes($self);
|
|
918 } else {
|
|
919 # warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlignBlock->level_id".
|
|
920 # " You either have to specify more information (see perldoc for".
|
|
921 # " Bio::EnsEMBL::Compara::GenomicAlignBlock) or to set it up directly");
|
|
922 }
|
|
923 }
|
|
924
|
|
925 return $self->{'level_id'};
|
|
926 }
|
|
927
|
|
928 =head2 requesting_slice (DEPRECATED)
|
|
929
|
|
930 DEPRECATED! Use Bio::EnsEMBL::Compara::GenomicAlignBlock->reference_slice() method instead
|
|
931
|
|
932 Arg [1] : Bio::EnsEMBL::Slice $reference_slice
|
|
933 Example : my $reference_slice = $genomic_align_block->requesting_slice;
|
|
934 Example : $genomic_align_block->requesting_slice($reference_slice);
|
|
935 Description: get/set for attribute reference_slice.
|
|
936 Returntype : Bio::EnsEMBL::Slice object
|
|
937 Exceptions : throw if $reference_slice is not a Bio::EnsEMBL::Slice
|
|
938 Caller : general
|
|
939
|
|
940 =cut
|
|
941
|
|
942 sub requesting_slice {
|
|
943 my ($self) = shift;
|
|
944 deprecate("Use Bio::EnsEMBL::Compara::GenomicAlignBlock->reference_slice() method instead");
|
|
945 return $self->reference_slice(@_);
|
|
946 }
|
|
947
|
|
948 =head2 alignment_strings
|
|
949
|
|
950 Arg [1] : none
|
|
951 Example : $genomic_align_block->alignment_strings
|
|
952 Description: Returns the alignment string of all the sequences in the
|
|
953 alignment
|
|
954 Returntype : array reference containing several strings
|
|
955 Exceptions : none
|
|
956 Caller : general
|
|
957 Status : Stable
|
|
958
|
|
959 =cut
|
|
960
|
|
961 sub alignment_strings {
|
|
962 my ($self) = @_;
|
|
963 my $alignment_strings = [];
|
|
964
|
|
965 foreach my $genomic_align (@{$self->get_all_GenomicAligns}) {
|
|
966 push(@$alignment_strings, $genomic_align->aligned_sequence);
|
|
967 }
|
|
968
|
|
969 return $alignment_strings;
|
|
970 }
|
|
971
|
|
972
|
|
973 =head2 get_SimpleAlign
|
|
974
|
|
975 Arg [1] : list of string $flags
|
|
976 "translated" = by default, the sequence alignment will be on nucleotide. With "translated" flag
|
|
977 the aligned sequences are translated.
|
|
978 "uc" = by default aligned sequences are given in lower cases. With "uc" flag, the aligned
|
|
979 sequences are given in upper cases.
|
|
980 "display_id" = by default the name of each sequence in the alignment is $dnafrag->name. With
|
|
981 "dispaly_id" flag the name of each sequence is defined by the
|
|
982 Bio::EnsEMBL::Compara::GenomicAlign display_id method.
|
|
983 Example : $daf->get_SimpleAlign or
|
|
984 $daf->get_SimpleAlign("translated") or
|
|
985 $daf->get_SimpleAlign("translated","uc")
|
|
986 Description: Allows to rebuild the alignment string of all the genomic_align objects within
|
|
987 this genomic_align_block using the cigar_line information
|
|
988 and access to the core database slice objects
|
|
989 Returntype : a Bio::SimpleAlign object
|
|
990 Exceptions :
|
|
991 Caller : general
|
|
992 Status : Stable
|
|
993
|
|
994 =cut
|
|
995
|
|
996 sub get_SimpleAlign {
|
|
997 my ( $self, @flags ) = @_;
|
|
998
|
|
999 # setting the flags
|
|
1000 my $uc = 0;
|
|
1001 my $translated = 0;
|
|
1002 my $display_id = 0;
|
|
1003
|
|
1004 for my $flag ( @flags ) {
|
|
1005 $uc = 1 if ($flag =~ /^uc$/i);
|
|
1006 $translated = 1 if ($flag =~ /^translated$/i);
|
|
1007 $display_id = 1 if ($flag =~ /^display_id$/i);
|
|
1008 }
|
|
1009
|
|
1010 my $sa = Bio::SimpleAlign->new();
|
|
1011
|
|
1012 #Hack to try to work with both bioperl 0.7 and 1.2:
|
|
1013 #Check to see if the method is called 'addSeq' or 'add_seq'
|
|
1014 my $bio07 = 0;
|
|
1015 if(!$sa->can('add_seq')) {
|
|
1016 $bio07 = 1;
|
|
1017 }
|
|
1018
|
|
1019 my $all_genomic_aligns;
|
|
1020 if ($self->reference_genomic_align) {
|
|
1021 $all_genomic_aligns = [$self->reference_genomic_align,@{$self->get_all_non_reference_genomic_aligns}];
|
|
1022 } else {
|
|
1023 $all_genomic_aligns = $self->get_all_GenomicAligns();
|
|
1024 }
|
|
1025
|
|
1026 foreach my $genomic_align (@$all_genomic_aligns) {
|
|
1027 my $alignSeq = $genomic_align->aligned_sequence;
|
|
1028 next if($alignSeq=~/^[\.\-]+$/);
|
|
1029
|
|
1030 my $loc_seq = Bio::LocatableSeq->new(-SEQ => $uc ? uc $alignSeq : lc $alignSeq,
|
|
1031 -START => $genomic_align->dnafrag_start,
|
|
1032 -END => $genomic_align->dnafrag_end,
|
|
1033 -ID => $display_id ? $genomic_align->display_id : ($genomic_align->dnafrag->genome_db->name . "/" . $genomic_align->dnafrag->name),
|
|
1034 -STRAND => $genomic_align->dnafrag_strand);
|
|
1035
|
|
1036 $loc_seq->seq($uc ? uc $loc_seq->translate->seq
|
|
1037 : lc $loc_seq->translate->seq) if ($translated);
|
|
1038
|
|
1039 if($bio07) { $sa->addSeq($loc_seq); }
|
|
1040 else { $sa->add_seq($loc_seq); }
|
|
1041
|
|
1042 }
|
|
1043 return $sa;
|
|
1044 }
|
|
1045
|
|
1046
|
|
1047 =head2 _create_from_a_list_of_ungapped_genomic_align_blocks (testing)
|
|
1048
|
|
1049 Args : listref of ungapped Bio::EnsEMBL::Compara::GenomicAlignBlocks
|
|
1050 Example : $new_genomic_align_block =
|
|
1051 $self->_create_from_a_list_of_ungapped_genomic_align_blocks(
|
|
1052 $ungapped_genomic_align_blocks
|
|
1053 );
|
|
1054 Description: Takes a list of ungapped Bio::EnsEMBL::Compara::GenomicAlignBlock
|
|
1055 objects and creates a new Bio::EnsEMBL::Compara::GenomicAlignBlock
|
|
1056 Returntype : Bio::EnsEMBL::Compara::GenomicAlignBlock object
|
|
1057 Exceptions : lots...
|
|
1058 Caller : new()
|
|
1059 Status : At risk
|
|
1060
|
|
1061 =cut
|
|
1062
|
|
1063 sub _create_from_a_list_of_ungapped_genomic_align_blocks {
|
|
1064 my ($self, $ungapped_genomic_align_blocks) = @_;
|
|
1065
|
|
1066 ## Set adaptor
|
|
1067 my $adaptor = undef;
|
|
1068 foreach my $genomic_align_block (@$ungapped_genomic_align_blocks) {
|
|
1069 if ($genomic_align_block->adaptor) {
|
|
1070 $self->adaptor($adaptor);
|
|
1071 last;
|
|
1072 }
|
|
1073 }
|
|
1074
|
|
1075 ## Set method_link_species_set
|
|
1076 my $method_link_species_set = undef;
|
|
1077 foreach my $genomic_align_block (@$ungapped_genomic_align_blocks) {
|
|
1078 if ($genomic_align_block->method_link_species_set) {
|
|
1079 if ($method_link_species_set) {
|
|
1080 if ($method_link_species_set->dbID != $genomic_align_block->method_link_species_set->dbID) {
|
|
1081 warning("Creating a GenomicAlignBlock from a list of ungapped GenomicAlignBlock with".
|
|
1082 " different method_link_species_set is not supported");
|
|
1083 return undef;
|
|
1084 }
|
|
1085 } else {
|
|
1086 $method_link_species_set = $genomic_align_block->method_link_species_set;
|
|
1087 }
|
|
1088 }
|
|
1089 }
|
|
1090 $self->method_link_species_set($method_link_species_set);
|
|
1091
|
|
1092 ## Set method_link_species_set_id
|
|
1093 my $method_link_species_set_id = undef;
|
|
1094 foreach my $genomic_align_block (@$ungapped_genomic_align_blocks) {
|
|
1095 if ($genomic_align_block->method_link_species_set_id) {
|
|
1096 if ($method_link_species_set_id) {
|
|
1097 if ($method_link_species_set_id != $genomic_align_block->method_link_species_set_id) {
|
|
1098 warning("Creating a GenomicAlignBlock from a list of ungapped GenomicAlignBlock with".
|
|
1099 " different method_link_species_set_id is not supported");
|
|
1100 return undef;
|
|
1101 }
|
|
1102 } else {
|
|
1103 $method_link_species_set_id = $genomic_align_block->method_link_species_set_id;
|
|
1104 }
|
|
1105 }
|
|
1106 }
|
|
1107 $self->method_link_species_set_id($method_link_species_set_id);
|
|
1108
|
|
1109 my $genomic_aligns;
|
|
1110 ## Check blocks and create new genomic_aligns
|
|
1111 foreach my $genomic_align_block (@$ungapped_genomic_align_blocks) {
|
|
1112 foreach my $this_genomic_align (@{$genomic_align_block->get_all_GenomicAligns}) {
|
|
1113 my $dnafrag_id = $this_genomic_align->dnafrag_id;
|
|
1114 if (!defined($genomic_aligns->{$dnafrag_id})) {
|
|
1115 $genomic_aligns->{$dnafrag_id} = new Bio::EnsEMBL::Compara::GenomicAlign(
|
|
1116 -adaptor => $this_genomic_align->adaptor,
|
|
1117 -method_link_species_set => $method_link_species_set,
|
|
1118 -method_link_species_set_id => $method_link_species_set_id,
|
|
1119 -dnafrag => $this_genomic_align->dnafrag,
|
|
1120 -dnafrag_start => $this_genomic_align->dnafrag_start,
|
|
1121 -dnafrag_end => $this_genomic_align->dnafrag_end,
|
|
1122 -dnafrag_strand => $this_genomic_align->dnafrag_strand,
|
|
1123 );
|
|
1124 } else {
|
|
1125 ## Check strand
|
|
1126 if ($this_genomic_align->dnafrag_strand < $genomic_aligns->{$dnafrag_id}->dnafrag_strand) {
|
|
1127 warning("The list of ungapped GenomicAlignBlock is inconsistent in strand");
|
|
1128 return undef;
|
|
1129 }
|
|
1130
|
|
1131 ## Check order and lengthen genomic_align
|
|
1132 if ($this_genomic_align->dnafrag_strand == -1) {
|
|
1133 if ($this_genomic_align->dnafrag_end >= $genomic_aligns->{$dnafrag_id}->dnafrag_start) {
|
|
1134 warning("The list of ungapped GenomicAlignBlock must be previously sorted");
|
|
1135 return undef;
|
|
1136 }
|
|
1137 $genomic_aligns->{$dnafrag_id}->dnafrag_start($this_genomic_align->dnafrag_start);
|
|
1138 } else {
|
|
1139 if ($this_genomic_align->dnafrag_start <= $genomic_aligns->{$dnafrag_id}->dnafrag_end) {
|
|
1140 warning("The list of ungapped GenomicAlignBlock must be previously sorted");
|
|
1141 return undef;
|
|
1142 }
|
|
1143 $genomic_aligns->{$dnafrag_id}->dnafrag_end($this_genomic_align->dnafrag_end);
|
|
1144 }
|
|
1145 }
|
|
1146 }
|
|
1147 }
|
|
1148
|
|
1149 ## Create cigar lines
|
|
1150 my $cigar_lines;
|
|
1151 for (my $i=0; $i<@$ungapped_genomic_align_blocks; $i++) {
|
|
1152 my $genomic_align_block = $ungapped_genomic_align_blocks->[$i];
|
|
1153 my $block_length = 0;
|
|
1154 ## Calculate block length
|
|
1155 foreach my $this_genomic_align (@{$genomic_align_block->get_all_GenomicAligns}) {
|
|
1156 if ($block_length) {
|
|
1157 if ($block_length != CORE::length($this_genomic_align->aligned_sequence)) {
|
|
1158 warning("The list of ungapped GenomicAlignBlock is inconsistent in gaps");
|
|
1159 return undef;
|
|
1160 }
|
|
1161 } else {
|
|
1162 $block_length = CORE::length($this_genomic_align->aligned_sequence);
|
|
1163 }
|
|
1164 }
|
|
1165
|
|
1166 next if ($block_length == 0); # Skip 0-length blocks (shouldn't happen)
|
|
1167 $block_length = "" if ($block_length == 1); # avoid a "1" in cigar_line
|
|
1168
|
|
1169 ## Fix cigar line according to block length
|
|
1170 while (my ($id, $genomic_align) = each %{$genomic_aligns}) {
|
|
1171 my $is_included_in_this_block = 0;
|
|
1172 foreach my $this_genomic_align (@{$genomic_align_block->get_all_GenomicAligns}) {
|
|
1173 if ($this_genomic_align->dnafrag_id == $id) {
|
|
1174 $is_included_in_this_block = 1;
|
|
1175 $cigar_lines->{$id} .= $this_genomic_align->cigar_line;
|
|
1176 last;
|
|
1177 }
|
|
1178 }
|
|
1179 if (!$is_included_in_this_block) {
|
|
1180 $cigar_lines->{$id} .= $block_length."D";
|
|
1181 }
|
|
1182 }
|
|
1183
|
|
1184 ## Add extra gaps between genomic_align_blocks
|
|
1185 if (defined($ungapped_genomic_align_blocks->[$i+1])) {
|
|
1186 foreach my $genomic_align1 (@{$genomic_align_block->get_all_GenomicAligns}) {
|
|
1187 foreach my $genomic_align2 (@{$ungapped_genomic_align_blocks->[$i+1]->get_all_GenomicAligns}) {
|
|
1188 next if ($genomic_align1->dnafrag_id != $genomic_align2->dnafrag_id);
|
|
1189 ## $gap is the piece of sequence of this dnafrag between this block and the next one
|
|
1190 my $gap;
|
|
1191 if ($genomic_align1->dnafrag_strand == 1) {
|
|
1192 $gap = $genomic_align2->dnafrag_start - $genomic_align1->dnafrag_end - 1;
|
|
1193 } else {
|
|
1194 $gap = $genomic_align1->dnafrag_start - $genomic_align2->dnafrag_end - 1;
|
|
1195 }
|
|
1196 if ($gap) {
|
|
1197 $gap = "" if ($gap == 1);
|
|
1198 foreach my $genomic_align3 (@{$genomic_align_block->get_all_GenomicAligns}) {
|
|
1199 if ($genomic_align1->dnafrag_id == $genomic_align3->dnafrag_id) {
|
|
1200 ## Add (mis)matches for this sequence
|
|
1201 $cigar_lines->{$genomic_align3->dnafrag_id} .= $gap."M";
|
|
1202 } else {
|
|
1203 ## Add gaps for others
|
|
1204 $cigar_lines->{$genomic_align3->dnafrag_id} .= $gap."D";
|
|
1205 }
|
|
1206 }
|
|
1207 }
|
|
1208 }
|
|
1209 }
|
|
1210 }
|
|
1211
|
|
1212 }
|
|
1213
|
|
1214 while (my ($id, $genomic_align) = each %{$genomic_aligns}) {
|
|
1215 $genomic_align->cigar_line($cigar_lines->{$id});
|
|
1216 $self->add_GenomicAlign($genomic_align);
|
|
1217 }
|
|
1218
|
|
1219 return $self;
|
|
1220 }
|
|
1221
|
|
1222
|
|
1223 =head2 get_all_ungapped_GenomicAlignBlocks
|
|
1224
|
|
1225 Args : (optional) listref $genome_dbs
|
|
1226 Example : my $ungapped_genomic_align_blocks =
|
|
1227 $self->get_all_ungapped_GenomicAlignBlocks();
|
|
1228 Example : my $ungapped_genomic_align_blocks =
|
|
1229 $self->get_all_ungapped_GenomicAlignBlocks([$human_genome_db, $mouse_genome_db]);
|
|
1230 Description: split the GenomicAlignBlock object into a set of ungapped
|
|
1231 alignments. If a list of genome_dbs is provided, only those
|
|
1232 sequences will be taken into account. This can be used to extract
|
|
1233 ungapped pairwise alignments from multiple alignments.
|
|
1234 Returntype : listref of Bio::EnsEMBL::Compara::GenomicAlignBlocks objects
|
|
1235 Exceptions : none
|
|
1236 Caller : general
|
|
1237 Status : At risk
|
|
1238
|
|
1239 =cut
|
|
1240
|
|
1241 sub get_all_ungapped_GenomicAlignBlocks {
|
|
1242 my ($self, $genome_dbs) = @_;
|
|
1243 my $ungapped_genomic_align_blocks = [];
|
|
1244
|
|
1245 my $genomic_aligns = $self->get_all_GenomicAligns;
|
|
1246 if ($genome_dbs and @$genome_dbs) {
|
|
1247 my $these_genomic_aligns = [];
|
|
1248 foreach my $this_genomic_align (@$genomic_aligns) {
|
|
1249 if (grep {$this_genomic_align->genome_db->name eq $_->name} @$genome_dbs) {
|
|
1250 push(@$these_genomic_aligns, $this_genomic_align);
|
|
1251 }
|
|
1252 }
|
|
1253 if (@$these_genomic_aligns > 1) {
|
|
1254 $genomic_aligns = $these_genomic_aligns;
|
|
1255 } else {
|
|
1256 return [];
|
|
1257 }
|
|
1258 }
|
|
1259 my $aln_length = CORE::length($genomic_aligns->[0]->aligned_sequence("+FAKE_SEQ"));
|
|
1260 # foreach my $this_genomic_align (@$genomic_aligns) {
|
|
1261 # print STDERR join(" - ", $this_genomic_align->dnafrag_start, $this_genomic_align->dnafrag_end,
|
|
1262 # $this_genomic_align->dnafrag_strand, $this_genomic_align->aligned_sequence("+FAKE_SEQ")), "\n";
|
|
1263 # }
|
|
1264
|
|
1265 my $aln_pos = 0;
|
|
1266 my $gap;
|
|
1267 my $end_block_pos;
|
|
1268 do {
|
|
1269 $end_block_pos = undef;
|
|
1270 my $these_genomic_aligns_with_no_gaps;
|
|
1271
|
|
1272 ## Get the (next) first gap from all the aligned sequences (sets: $gap_pos, $gap and $genomic_align_block_id)
|
|
1273 foreach my $this_genomic_align (@$genomic_aligns) {
|
|
1274 my $this_end_block_pos = index($this_genomic_align->aligned_sequence("+FAKE_SEQ"), "-", $aln_pos);
|
|
1275 if ($this_end_block_pos == $aln_pos) {
|
|
1276 ## try to find the end of the gaps
|
|
1277 my $gap_string = substr($this_genomic_align->aligned_sequence("+FAKE_SEQ"), $aln_pos);
|
|
1278 ($gap) = $gap_string =~ /^(\-+)/;
|
|
1279 my $gap_length = CORE::length($gap);
|
|
1280 $this_end_block_pos = $aln_pos+$gap_length;
|
|
1281 } else {
|
|
1282 $these_genomic_aligns_with_no_gaps->{$this_genomic_align} = $this_genomic_align;
|
|
1283 }
|
|
1284 $this_end_block_pos = CORE::length($this_genomic_align->aligned_sequence("+FAKE_SEQ")) if ($this_end_block_pos < 0); # no more gaps have been found in this sequence
|
|
1285
|
|
1286
|
|
1287 if (!defined($end_block_pos) or $this_end_block_pos < $end_block_pos) {
|
|
1288 $end_block_pos = $this_end_block_pos;
|
|
1289 }
|
|
1290 }
|
|
1291
|
|
1292 if (scalar(keys(%$these_genomic_aligns_with_no_gaps)) > 1) {
|
|
1293 my $new_genomic_aligns;
|
|
1294 my $reference_genomic_align;
|
|
1295 foreach my $this_genomic_align (values %$these_genomic_aligns_with_no_gaps) {
|
|
1296 my $previous_seq = substr($this_genomic_align->aligned_sequence("+FAKE_SEQ"), 0, $aln_pos );
|
|
1297 $previous_seq =~ s/\-//g;
|
|
1298 my $dnafrag_start;
|
|
1299 my $dnafrag_end;
|
|
1300 my $cigar_line;
|
|
1301 my $cigar_length = ($end_block_pos - $aln_pos);
|
|
1302 $cigar_length = "" if ($cigar_length == 1);
|
|
1303 if ($this_genomic_align->dnafrag_strand == 1) {
|
|
1304 $dnafrag_start = $this_genomic_align->dnafrag_start + CORE::length($previous_seq);
|
|
1305 $dnafrag_end = $dnafrag_start + $end_block_pos - $aln_pos - 1;
|
|
1306 $cigar_line = $cigar_length."M";
|
|
1307 } else {
|
|
1308 $dnafrag_end = $this_genomic_align->dnafrag_end - CORE::length($previous_seq);
|
|
1309 $dnafrag_start = $dnafrag_end - $end_block_pos + $aln_pos + 1;
|
|
1310 $cigar_line = $cigar_length."M";
|
|
1311 }
|
|
1312 my $new_genomic_align = new Bio::EnsEMBL::Compara::GenomicAlign(
|
|
1313 -adaptor => $this_genomic_align->adaptor,
|
|
1314 -method_link_species_set => $this_genomic_align->method_link_species_set,
|
|
1315 -dnafrag => $this_genomic_align->dnafrag,
|
|
1316 -dnafrag_start => $dnafrag_start,
|
|
1317 -dnafrag_end => $dnafrag_end,
|
|
1318 -dnafrag_strand => $this_genomic_align->dnafrag_strand,
|
|
1319 -cigar_line => $cigar_line,
|
|
1320 );
|
|
1321 $reference_genomic_align = $new_genomic_align
|
|
1322 if (defined($self->reference_genomic_align) and
|
|
1323 $self->reference_genomic_align == $this_genomic_align);
|
|
1324 push(@$new_genomic_aligns, $new_genomic_align);
|
|
1325 }
|
|
1326 ## Create a new GenomicAlignBlock
|
|
1327 my $new_genomic_align_block = new Bio::EnsEMBL::Compara::GenomicAlignBlock(
|
|
1328 -method_link_species_set => $self->method_link_species_set,
|
|
1329 -length => $end_block_pos - $aln_pos,
|
|
1330 -genomic_align_array => $new_genomic_aligns,
|
|
1331 );
|
|
1332 $new_genomic_align_block->reference_genomic_align($reference_genomic_align) if (defined($reference_genomic_align));
|
|
1333 push(@$ungapped_genomic_align_blocks, $new_genomic_align_block);
|
|
1334 }
|
|
1335 $aln_pos = $end_block_pos;
|
|
1336
|
|
1337 } while ($aln_pos < $aln_length); # exit loop if no gap has been found
|
|
1338
|
|
1339 return $ungapped_genomic_align_blocks;
|
|
1340 }
|
|
1341
|
|
1342
|
|
1343 =head2 reverse_complement
|
|
1344
|
|
1345 Args : none
|
|
1346 Example : none
|
|
1347 Description: reverse complement the ,
|
|
1348 modifying dnafrag_strand and cigar_line of each GenomicAlign in consequence
|
|
1349 Returntype : none
|
|
1350 Exceptions : none
|
|
1351 Caller : general
|
|
1352 Status : Stable
|
|
1353
|
|
1354 =cut
|
|
1355
|
|
1356 sub reverse_complement {
|
|
1357 my ($self) = @_;
|
|
1358
|
|
1359 if (defined($self->{_original_strand})) {
|
|
1360 $self->{_original_strand} = 1 - $self->{_original_strand};
|
|
1361 } else {
|
|
1362 $self->{_original_strand} = 0;
|
|
1363 }
|
|
1364
|
|
1365 my $gas = $self->get_all_GenomicAligns;
|
|
1366 foreach my $ga (@{$gas}) {
|
|
1367 $ga->reverse_complement;
|
|
1368 }
|
|
1369 }
|
|
1370
|
|
1371 =head2 restrict_between_alignment_positions
|
|
1372
|
|
1373 Arg[1] : [optional] int $start, refers to the start of the alignment
|
|
1374 Arg[2] : [optional] int $end, refers to the start of the alignment
|
|
1375 Arg[3] : [optional] boolean $skip_empty_GenomicAligns
|
|
1376 Example : none
|
|
1377 Description: restrict this GenomicAlignBlock. It returns a new object unless no
|
|
1378 restriction is needed. In that case, it returns the original unchanged
|
|
1379 object.
|
|
1380 This method uses coordinates relative to the alignment itself.
|
|
1381 For instance if you have an alignment like:
|
|
1382 1 1 2 2 3
|
|
1383 1 5 0 5 0 5 0
|
|
1384 AAC--CTTGTGGTA-CTACTT-----ACTTT
|
|
1385 AACCCCTT-TGGTATCTACTTACCTAACTTT
|
|
1386 and you restrict it between 5 and 25, you will get back a
|
|
1387 object containing the following alignment:
|
|
1388 1 1
|
|
1389 1 5 0 5
|
|
1390 CTTGTGGTA-CTACTT----
|
|
1391 CTT-TGGTATCTACTTACCT
|
|
1392
|
|
1393 See restrict_between_reference_positions() elsewhere in this document
|
|
1394 for an alternative method using absolute genomic coordinates.
|
|
1395
|
|
1396 NB: This method works only for GenomicAlignBlock which have been
|
|
1397 fetched from the DB as it is adjusting the dnafrag coordinates
|
|
1398 and the cigar_line only and not the actual sequences stored in the
|
|
1399 object if any. If you want to restrict an object with no coordinates
|
|
1400 a simple substr() will do!
|
|
1401
|
|
1402 Returntype : Bio::EnsEMBL::Compara::GenomicAlignBlock object
|
|
1403 Exceptions : none
|
|
1404 Caller : general
|
|
1405 Status : At risk
|
|
1406
|
|
1407 =cut
|
|
1408
|
|
1409 sub restrict_between_alignment_positions {
|
|
1410 my ($self, $start, $end, $skip_empty_GenomicAligns) = @_;
|
|
1411 my $genomic_align_block;
|
|
1412 my $new_reference_genomic_align;
|
|
1413 my $new_genomic_aligns;
|
|
1414
|
|
1415 $start = 1 if (!defined($start) or $start < 1);
|
|
1416 $end = $self->length if (!defined($end) or $end > $self->length);
|
|
1417
|
|
1418 my $number_of_columns_to_trim_from_the_start = $start - 1;
|
|
1419 my $number_of_columns_to_trim_from_the_end = $self->length - $end;
|
|
1420
|
|
1421 ## Skip if no restriction is needed. Return original object! We are still going on with the
|
|
1422 ## restriction when either excess_at_the_start or excess_at_the_end are 0 as a (multiple)
|
|
1423 ## alignment may start or end with gaps in the reference species. In that case, we want to
|
|
1424 ## trim these gaps from the alignment as they fall just outside of the region of interest
|
|
1425 return $self if ($number_of_columns_to_trim_from_the_start <= 0
|
|
1426 and $number_of_columns_to_trim_from_the_end <= 0);
|
|
1427
|
|
1428 my $final_alignment_length = $end - $start + 1;
|
|
1429
|
|
1430 ## Create a new Bio::EnsEMBL::Compara::GenomicAlignBlock object with restricted GenomicAligns
|
|
1431 my $length = $self->{length};
|
|
1432 foreach my $this_genomic_align (@{$self->get_all_GenomicAligns}) {
|
|
1433 my $new_genomic_align = $this_genomic_align->restrict($start, $end, $length);
|
|
1434 if ($self->reference_genomic_align and $this_genomic_align == $self->reference_genomic_align) {
|
|
1435 $new_reference_genomic_align = $new_genomic_align;
|
|
1436 }
|
|
1437 push(@$new_genomic_aligns, $new_genomic_align);
|
|
1438 }
|
|
1439 $genomic_align_block = new Bio::EnsEMBL::Compara::GenomicAlignBlock(
|
|
1440 -method_link_species_set => $self->method_link_species_set,
|
|
1441 -genomic_align_array => $new_genomic_aligns,
|
|
1442 -group_id => $self->group_id,
|
|
1443 -level_id => $self->level_id,
|
|
1444 );
|
|
1445 $genomic_align_block->{original_dbID} = ($self->dbID or $self->{original_dbID});
|
|
1446 $genomic_align_block->{_original_strand} = $self->{_original_strand};
|
|
1447 if ($new_reference_genomic_align) {
|
|
1448 $genomic_align_block->reference_genomic_align($new_reference_genomic_align);
|
|
1449 }
|
|
1450 $genomic_align_block->reference_slice($self->reference_slice);
|
|
1451
|
|
1452 # The restriction might result in empty GenomicAligns. If the
|
|
1453 # skip_empty_GenomicAligns flag is set, remove them from the block.
|
|
1454 if ($skip_empty_GenomicAligns) {
|
|
1455 my $reference_genomic_align = $genomic_align_block->reference_genomic_align();
|
|
1456 my $genomic_align_array = $genomic_align_block->genomic_align_array;
|
|
1457 for (my $i=0; $i<@$genomic_align_array; $i++) {
|
|
1458 if ($genomic_align_array->[$i]->dnafrag_start > $genomic_align_array->[$i]->dnafrag_end) {
|
|
1459 splice(@$genomic_align_array, $i, 1);
|
|
1460 $i--;
|
|
1461 }
|
|
1462 }
|
|
1463 $genomic_align_block->reference_genomic_align($reference_genomic_align) if ($reference_genomic_align);
|
|
1464 }
|
|
1465 $genomic_align_block->length($final_alignment_length);
|
|
1466
|
|
1467 return $genomic_align_block;
|
|
1468 }
|
|
1469
|
|
1470 =head2 _print
|
|
1471
|
|
1472 Arg [1] : none
|
|
1473 Example : $genomic_align->_print
|
|
1474 Description: print attributes of the object to the STDOUT. Used for debuging purposes.
|
|
1475 Returntype : none
|
|
1476 Exceptions :
|
|
1477 Caller : object::methodname
|
|
1478 Status : At risk
|
|
1479
|
|
1480 =cut
|
|
1481
|
|
1482 sub _print {
|
|
1483 my ($self, $FILEH) = @_;
|
|
1484
|
|
1485 $FILEH ||= \*STDOUT;
|
|
1486 print $FILEH
|
|
1487 "Bio::EnsEMBL::Compara::GenomicAlignBlock object ($self)
|
|
1488 dbID = ", ($self->dbID or "-undef-"), "
|
|
1489 adaptor = ", ($self->adaptor or "-undef-"), "
|
|
1490 method_link_species_set = ", ($self->method_link_species_set or "-undef-"), "
|
|
1491 method_link_species_set_id = ", ($self->method_link_species_set_id or "-undef-"), "
|
|
1492 genomic_aligns = ", (scalar(@{$self->genomic_align_array}) or "-undef-"), "
|
|
1493 score = ", ($self->score or "-undef-"), "
|
|
1494 length = ", ($self->length or "-undef-"), "
|
|
1495 alignments: \n";
|
|
1496 foreach my $this_genomic_align (@{$self->genomic_align_array()}) {
|
|
1497 my $slice = $this_genomic_align->get_Slice;
|
|
1498 if ($self->reference_genomic_align and $self->reference_genomic_align == $this_genomic_align) {
|
|
1499 print $FILEH " * ", $this_genomic_align->genome_db->name, " ",
|
|
1500 ($slice?$slice->name:"--error--"), "\n";
|
|
1501 } else {
|
|
1502 print $FILEH " - ", $this_genomic_align->genome_db->name, " ",
|
|
1503 ($slice?$slice->name:"--error--"), "\n";
|
|
1504 }
|
|
1505 }
|
|
1506
|
|
1507 }
|
|
1508
|
|
1509
|
|
1510 #####################################################################
|
|
1511 #####################################################################
|
|
1512
|
|
1513 =head1 METHODS FOR BACKWARDS COMPATIBILITY
|
|
1514
|
|
1515 Consensus and Query DnaFrag are no longer used. DO NOT USE THOSE METHODS IN NEW SCRIPTS, THEY WILL DISSAPEAR!!
|
|
1516
|
|
1517 For backwards compatibility, consensus_genomic_align correponds to the lower genome_db_id by convention.
|
|
1518 This convention works for pairwise alignment only! Trying to use the old API methods for multiple
|
|
1519 alignments will throw an exception.
|
|
1520
|
|
1521 =cut
|
|
1522
|
|
1523 #####################################################################
|
|
1524 #####################################################################
|
|
1525
|
|
1526
|
|
1527 =head2 get_old_consensus_genomic_align [FOR BACKWARDS COMPATIBILITY ONLY]
|
|
1528
|
|
1529 Arg [1] : none
|
|
1530 Example : $old_consensus_genomic_aligns = $genomic_align_group->get_old_consensus_genomic_align();
|
|
1531 Description: get the Bio::EnsEMBL::Compara::GenomicAlign object following the convention for backwards
|
|
1532 compatibility
|
|
1533 Returntype : Bio::EnsEMBL::Compara::GenomicAlign object
|
|
1534 Exceptions :
|
|
1535 Caller : general
|
|
1536
|
|
1537 =cut
|
|
1538
|
|
1539 sub get_old_consensus_genomic_align {
|
|
1540 my ($self) = @_;
|
|
1541
|
|
1542 my $genomic_aligns = $self->get_all_GenomicAligns;
|
|
1543 if (!@$genomic_aligns) {
|
|
1544 throw "Bio::EnsEMBL::Compara::GenomicAlignBlock ($self) does not have any associated".
|
|
1545 " Bio::EnsEMBL::Compara::GenomicAlign";
|
|
1546 }
|
|
1547
|
|
1548 if (scalar(@{$genomic_aligns}) != 2) {
|
|
1549 throw "Trying to get old_consensus_genomic_align from Bio::EnsEMBL::Compara::GenomicAlignBlock".
|
|
1550 " ($self) holding a multiple alignment";
|
|
1551 }
|
|
1552
|
|
1553 if ($genomic_aligns->[0]->dnafrag->genome_db->dbID > $genomic_aligns->[1]->dnafrag->genome_db->dbID) {
|
|
1554 return $genomic_aligns->[1];
|
|
1555
|
|
1556 } elsif ($genomic_aligns->[0]->dnafrag->genome_db->dbID < $genomic_aligns->[1]->dnafrag->genome_db->dbID) {
|
|
1557 return $genomic_aligns->[0];
|
|
1558
|
|
1559 ## If they belongs to the same genome_db, use the dnafrag_id instead
|
|
1560 } elsif ($genomic_aligns->[0]->dnafrag->dbID > $genomic_aligns->[1]->dnafrag->dbID) {
|
|
1561 return $genomic_aligns->[1];
|
|
1562
|
|
1563 } elsif ($genomic_aligns->[0]->dnafrag->dbID < $genomic_aligns->[1]->dnafrag->dbID) {
|
|
1564 return $genomic_aligns->[0];
|
|
1565
|
|
1566 ## If they belongs to the same genome_db and dnafrag, use the dnafrag_start instead
|
|
1567 } elsif ($genomic_aligns->[0]->dnafrag_start > $genomic_aligns->[1]->dnafrag_start) {
|
|
1568 return $genomic_aligns->[1];
|
|
1569
|
|
1570 } elsif ($genomic_aligns->[0]->dnafrag_start < $genomic_aligns->[1]->dnafrag_start) {
|
|
1571 return $genomic_aligns->[0];
|
|
1572
|
|
1573 ## If they belongs to the same genome_db and dnafrag and have the same danfrag_start, use the dnafrag_end instead
|
|
1574 } elsif ($genomic_aligns->[0]->dnafrag_end > $genomic_aligns->[1]->dnafrag_end) {
|
|
1575 return $genomic_aligns->[1];
|
|
1576
|
|
1577 } elsif ($genomic_aligns->[0]->dnafrag_end < $genomic_aligns->[1]->dnafrag_end) {
|
|
1578 return $genomic_aligns->[0];
|
|
1579
|
|
1580 ## If everithing else fails, use the dnafrag_strand
|
|
1581 } elsif ($genomic_aligns->[0]->dnafrag_strand > $genomic_aligns->[1]->dnafrag_strand) {
|
|
1582 return $genomic_aligns->[1];
|
|
1583
|
|
1584 } elsif ($genomic_aligns->[0]->dnafrag_strand < $genomic_aligns->[1]->dnafrag_strand) {
|
|
1585 return $genomic_aligns->[0];
|
|
1586
|
|
1587 # Whatever, they are the same. Use 0 for consensus and 1 for query
|
|
1588 } else {
|
|
1589 return $genomic_aligns->[0];
|
|
1590 }
|
|
1591 }
|
|
1592
|
|
1593
|
|
1594 =head2 get_old_query_genomic_align [FOR BACKWARDS COMPATIBILITY ONLY]
|
|
1595
|
|
1596 Arg [1] : none
|
|
1597 Example : $old_query_genomic_aligns = $genomic_align_group->get_old_query_genomic_align();
|
|
1598 Description: get the Bio::EnsEMBL::Compara::GenomicAlign object following the convention for backwards
|
|
1599 compatibility
|
|
1600 Returntype : Bio::EnsEMBL::Compara::GenomicAlign object
|
|
1601 Exceptions :
|
|
1602 Caller : general
|
|
1603
|
|
1604 =cut
|
|
1605
|
|
1606 sub get_old_query_genomic_align {
|
|
1607 my ($self) = @_;
|
|
1608
|
|
1609 my $genomic_aligns = $self->get_all_GenomicAligns;
|
|
1610 if (!@$genomic_aligns) {
|
|
1611 throw "Bio::EnsEMBL::Compara::GenomicAlignBlock ($self) does not have any associated".
|
|
1612 " Bio::EnsEMBL::Compara::GenomicAlign";
|
|
1613 }
|
|
1614
|
|
1615 if (scalar(@{$genomic_aligns}) != 2) {
|
|
1616 throw "Trying to get old_consensus_genomic_align from Bio::EnsEMBL::Compara::GenomicAlignBlock".
|
|
1617 " ($self) holding a multiple alignment";
|
|
1618 }
|
|
1619
|
|
1620 if ($genomic_aligns->[0]->dnafrag->genome_db->dbID > $genomic_aligns->[1]->dnafrag->genome_db->dbID) {
|
|
1621 return $genomic_aligns->[0];
|
|
1622
|
|
1623 } elsif ($genomic_aligns->[0]->dnafrag->genome_db->dbID < $genomic_aligns->[1]->dnafrag->genome_db->dbID) {
|
|
1624 return $genomic_aligns->[1];
|
|
1625
|
|
1626 ## If they belongs to the same genome_db, use the dnafrag_id instead
|
|
1627 } elsif ($genomic_aligns->[0]->dnafrag->dbID > $genomic_aligns->[1]->dnafrag->dbID) {
|
|
1628 return $genomic_aligns->[0];
|
|
1629
|
|
1630 } elsif ($genomic_aligns->[0]->dnafrag->dbID < $genomic_aligns->[1]->dnafrag->dbID) {
|
|
1631 return $genomic_aligns->[1];
|
|
1632
|
|
1633 ## If they belongs to the same genome_db and dnafrag, use the dnafrag_start instead
|
|
1634 } elsif ($genomic_aligns->[0]->dnafrag_start > $genomic_aligns->[1]->dnafrag_start) {
|
|
1635 return $genomic_aligns->[0];
|
|
1636
|
|
1637 } elsif ($genomic_aligns->[0]->dnafrag_start < $genomic_aligns->[1]->dnafrag_start) {
|
|
1638 return $genomic_aligns->[1];
|
|
1639
|
|
1640 ## If they belongs to the same genome_db and dnafrag and have the same danfrag_start, use the dnafrag_end instead
|
|
1641 } elsif ($genomic_aligns->[0]->dnafrag_end > $genomic_aligns->[1]->dnafrag_end) {
|
|
1642 return $genomic_aligns->[0];
|
|
1643
|
|
1644 } elsif ($genomic_aligns->[0]->dnafrag_end < $genomic_aligns->[1]->dnafrag_end) {
|
|
1645 return $genomic_aligns->[1];
|
|
1646
|
|
1647 ## If everithing else fails, use the dnafrag_strand
|
|
1648 } elsif ($genomic_aligns->[0]->dnafrag_strand > $genomic_aligns->[1]->dnafrag_strand) {
|
|
1649 return $genomic_aligns->[0];
|
|
1650
|
|
1651 } elsif ($genomic_aligns->[0]->dnafrag_strand < $genomic_aligns->[1]->dnafrag_strand) {
|
|
1652 return $genomic_aligns->[1];
|
|
1653
|
|
1654 # Whatever, they are the same. Use 0 for consensus and 1 for query
|
|
1655 } else {
|
|
1656 return $genomic_aligns->[1];
|
|
1657 }
|
|
1658 }
|
|
1659
|
|
1660 1;
|