0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =head1 NAME
|
|
20
|
|
21 Bio::EnsEMBL::Compara::GenomicAlign - Defines one of the sequences involved in a genomic alignment
|
|
22
|
|
23 =head1 SYNOPSIS
|
|
24
|
|
25 use Bio::EnsEMBL::Compara::GenomicAlign;
|
|
26 my $genomic_align = new Bio::EnsEMBL::Compara::GenomicAlign(
|
|
27 -adaptor => $genomic_align_adaptor,
|
|
28 -genomic_align_block => $genomic_align_block,
|
|
29 -method_link_species_set => $method_link_species_set,
|
|
30 -dnafrag => $dnafrag,
|
|
31 -dnafrag_start => 100001,
|
|
32 -dnafrag_end => 100050,
|
|
33 -dnafrag_strand => -1,
|
|
34 -aligned_sequence => "TTGCAGGTAGGCCATCTGCAAGC----TGAGGAGCAAGGACTCCAGTCGGAGTC"
|
|
35 -visible => 1,
|
|
36 );
|
|
37
|
|
38
|
|
39 SET VALUES
|
|
40 $genomic_align->adaptor($adaptor);
|
|
41 $genomic_align->dbID(12);
|
|
42 $genomic_align->genomic_align_block($genomic_align_block);
|
|
43 $genomic_align->genomic_align_block_id(1032);
|
|
44 $genomic_align->method_link_species_set($method_link_species_set);
|
|
45 $genomic_align->method_link_species_set_id(3);
|
|
46 $genomic_align->dnafrag($dnafrag);
|
|
47 $genomic_align->dnafrag_id(134);
|
|
48 $genomic_align->dnafrag_start(100001);
|
|
49 $genomic_align->dnafrag_end(100050);
|
|
50 $genomic_align->dnafrag_strand(-1);
|
|
51 $genomic_align->aligned_sequence("TTGCAGGTAGGCCATCTGCAAGC----TGAGGAGCAAGGACTCCAGTCGGAGTC");
|
|
52 $genomic_align->original_sequence("TTGCAGGTAGGCCATCTGCAAGCTGAGGAGCAAGGACTCCAGTCGGAGTC");
|
|
53 $genomic_align->cigar_line("23M4D27M");
|
|
54 $genomic_align->visible(1);
|
|
55
|
|
56 GET VALUES
|
|
57 $adaptor = $genomic_align->adaptor;
|
|
58 $dbID = $genomic_align->dbID;
|
|
59 $genomic_align_block = $genomic_align->genomic_block;
|
|
60 $genomic_align_block_id = $genomic_align->genomic_align_block_id;
|
|
61 $method_link_species_set = $genomic_align->method_link_species_set;
|
|
62 $method_link_species_set_id = $genomic_align->method_link_species_set_id;
|
|
63 $dnafrag = $genomic_align->dnafrag;
|
|
64 $dnafrag_id = $genomic_align->dnafrag_id;
|
|
65 $dnafrag_start = $genomic_align->dnafrag_start;
|
|
66 $dnafrag_end = $genomic_align->dnafrag_end;
|
|
67 $dnafrag_strand = $genomic_align->dnafrag_strand;
|
|
68 $aligned_sequence = $genomic_align->aligned_sequence;
|
|
69 $original_sequence = $genomic_align->original_sequence;
|
|
70 $cigar_line = $genomic_align->cigar_line;
|
|
71 $visible = $genomic_align->visible;
|
|
72 $slice = $genomic_align->get_Slice();
|
|
73
|
|
74 =head1 DESCRIPTION
|
|
75
|
|
76 The GenomicAlign object stores information about a single sequence within an alignment.
|
|
77
|
|
78 =head1 OBJECT ATTRIBUTES
|
|
79
|
|
80 =over
|
|
81
|
|
82 =item dbID
|
|
83
|
|
84 corresponds to genomic_align.genomic_align_id
|
|
85
|
|
86 =item adaptor
|
|
87
|
|
88 Bio::EnsEMBL::Compara::DBSQL::GenomicAlignAdaptor object to access DB
|
|
89
|
|
90 =item genomic_align_block_id
|
|
91
|
|
92 corresponds to genomic_align_block.genomic_align_block_id (ext. reference)
|
|
93
|
|
94 =item genomic_align_block
|
|
95
|
|
96 Bio::EnsEMBL::Compara::DBSQL::GenomicAlignBlock object corresponding to genomic_align_block_id
|
|
97
|
|
98 =item method_link_species_set_id
|
|
99
|
|
100 corresponds to method_link_species_set.method_link_species_set_id (external ref.)
|
|
101
|
|
102 =item method_link_species_set
|
|
103
|
|
104 Bio::EnsEMBL::Compara::DBSQL::MethodLinkSpeciesSet object corresponding to method_link_species_set_id
|
|
105
|
|
106 =item dnafrag_id
|
|
107
|
|
108 corresponds to dnafrag.dnafrag_id (external ref.)
|
|
109
|
|
110 =item dnafrag
|
|
111
|
|
112 Bio::EnsEMBL::Compara::DnaFrag object corresponding to dnafrag_id
|
|
113
|
|
114 =item dnafrag_start
|
|
115
|
|
116 corresponds to genomic_align.dnafrag_start
|
|
117
|
|
118 =item dnafrag_end
|
|
119
|
|
120 corresponds to genomic_align.dnafrag_end
|
|
121
|
|
122 =item dnafrag_strand
|
|
123
|
|
124 corresponds to genomic_align.dnafrag_strand
|
|
125
|
|
126 =item cigar_line
|
|
127
|
|
128 corresponds to genomic_align.cigar_line
|
|
129
|
|
130 =item visible
|
|
131
|
|
132 corresponds to genomic_align.visible
|
|
133
|
|
134 =item aligned_sequence
|
|
135
|
|
136 corresponds to the sequence rebuilt using dnafrag and cigar_line
|
|
137
|
|
138 =item original_sequence
|
|
139
|
|
140 corresponds to the original sequence. It can be rebuilt from the aligned_sequence, the dnafrag object or can be used
|
|
141 in conjuction with cigar_line to get the aligned_sequence.
|
|
142
|
|
143 =back
|
|
144
|
|
145 =head1 APPENDIX
|
|
146
|
|
147 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
|
|
148
|
|
149 =cut
|
|
150
|
|
151
|
|
152 # Let the code begin...
|
|
153
|
|
154
|
|
155 package Bio::EnsEMBL::Compara::GenomicAlign;
|
|
156 use strict;
|
|
157
|
|
158 use Bio::EnsEMBL::Utils::Exception qw(throw warning deprecate verbose);
|
|
159 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
160 use Scalar::Util qw(weaken);
|
|
161 use Bio::EnsEMBL::Compara::BaseGenomicAlignSet;
|
|
162 use Bio::EnsEMBL::Compara::MethodLinkSpeciesSet;
|
|
163 use Bio::EnsEMBL::Mapper;
|
|
164
|
|
165
|
|
166 =head2 new (CONSTRUCTOR)
|
|
167
|
|
168 Arg [-DBID] : (opt.) int $dbID (the database internal ID for this object)
|
|
169 Arg [-ADAPTOR]
|
|
170 : (opt.) Bio::EnsEMBL::Compara::DBSQL::GenomicAlignAdaptor $adaptor
|
|
171 (the adaptor for connecting to the database)
|
|
172 Arg [-GENOMIC_ALIGN_BLOCK]
|
|
173 : (opt.) Bio::EnsEMBL::Compara::GenomicAlignBlock $genomic_align_block
|
|
174 (the block to which this Bio::EnsEMBL::Compara::GenomicAlign object
|
|
175 belongs to)
|
|
176 Arg [-GENOMIC_ALIGN_BLOCK_ID]
|
|
177 : (opt.) int $genomic_align_block_id (the database internal ID for the
|
|
178 $genomic_align_block)
|
|
179 Arg [-METHOD_LINK_SPECIES_SET]
|
|
180 : (opt.) Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $mlss
|
|
181 (this defines the type of alignment and the set of species used
|
|
182 to get this GenomicAlignBlock)
|
|
183 Arg [-METHOD_LINK_SPECIES_SET_ID]
|
|
184 : (opt.) int $mlss_id (the database internal ID for the $mlss)
|
|
185 Arg [-DNAFRAG]
|
|
186 : (opt.) Bio::EnsEMBL::Compara::DnaFrag $dnafrag (the genomic
|
|
187 sequence object to which this object refers to)
|
|
188 Arg [-DNAFRAG_ID]
|
|
189 : (opt.) int $dnafrag_id (the database internal ID for the $dnafrag)
|
|
190 Arg [-DNAFRAG_START]
|
|
191 : (opt.) int $dnafrag_start (the starting position of this
|
|
192 Bio::EnsEMBL::Compara::GenomicAling within its corresponding $dnafrag)
|
|
193 Arg [-DNAFRAG_END]
|
|
194 : (opt.) int $dnafrag_end (the ending position of this
|
|
195 Bio::EnsEMBL::Compara::GenomicAling within its corresponding $dnafrag)
|
|
196 Arg [-DNAFRAG_STRAND]
|
|
197 : (opt.) int $dnafrag_strand (1 or -1; defines in which strand of its
|
|
198 corresponding $dnafrag this Bio::EnsEMBL::Compara::GenomicAlign is)
|
|
199 Arg [-ALIGNED_SEQUENCE]
|
|
200 : (opt.) string $aligned_sequence (the sequence of this object, including
|
|
201 gaps and all)
|
|
202 Arg [-CIGAR_LINE]
|
|
203 : (opt.) string $cigar_line (a compressed way of representing the indels in
|
|
204 the $aligned_sequence of this object)
|
|
205 Arg [-VISIBLE]
|
|
206 : (opt.) int $visible. Used in self alignments to ensure only one Bio::EnsEMBL::Compara::GenomicAlignBlock is visible when you have more than 1 block covering the same region.
|
|
207 Example : my $genomic_align = new Bio::EnsEMBL::Compara::GenomicAlign(
|
|
208 -adaptor => $genomic_align_adaptor,
|
|
209 -genomic_align_block => $genomic_align_block,
|
|
210 -method_link_species_set => $method_link_species_set,
|
|
211 -dnafrag => $dnafrag,
|
|
212 -dnafrag_start => 100001,
|
|
213 -dnafrag_end => 100050,
|
|
214 -dnafrag_strand => -1,
|
|
215 -aligned_sequence => "TTGCAGGTAGGCCATCTGCAAGC----TGAGGAGCAAGGACTCCAGTCGGAGTC"
|
|
216 -visible => 1,
|
|
217 );
|
|
218 Description : Creates a new Bio::EnsEMBL::Compara::GenomicAlign object
|
|
219 Returntype : Bio::EnsEMBL::Compara::GenomicAlign object
|
|
220 Exceptions : none
|
|
221 Caller : general
|
|
222 Status : Stable
|
|
223
|
|
224 =cut
|
|
225
|
|
226 sub new {
|
|
227 my($class, @args) = @_;
|
|
228
|
|
229 my $self = {};
|
|
230 bless $self,$class;
|
|
231
|
|
232 my ($cigar_line, $adaptor,
|
|
233 $dbID, $genomic_align_block, $genomic_align_block_id, $method_link_species_set,
|
|
234 $method_link_species_set_id, $dnafrag, $dnafrag_id,
|
|
235 $dnafrag_start, $dnafrag_end, $dnafrag_strand,
|
|
236 $aligned_sequence, $visible, $node_id ) =
|
|
237
|
|
238 rearrange([qw(
|
|
239 CIGAR_LINE ADAPTOR
|
|
240 DBID GENOMIC_ALIGN_BLOCK GENOMIC_ALIGN_BLOCK_ID METHOD_LINK_SPECIES_SET
|
|
241 METHOD_LINK_SPECIES_SET_ID DNAFRAG DNAFRAG_ID
|
|
242 DNAFRAG_START DNAFRAG_END DNAFRAG_STRAND
|
|
243 ALIGNED_SEQUENCE VISIBLE NODE_ID)], @args);
|
|
244
|
|
245 $self->adaptor( $adaptor ) if defined $adaptor;
|
|
246 $self->cigar_line( $cigar_line ) if defined $cigar_line;
|
|
247
|
|
248 $self->dbID($dbID) if (defined($dbID));
|
|
249 $self->genomic_align_block($genomic_align_block) if (defined($genomic_align_block));
|
|
250 $self->genomic_align_block_id($genomic_align_block_id) if (defined($genomic_align_block_id));
|
|
251 $self->method_link_species_set($method_link_species_set) if (defined($method_link_species_set));
|
|
252 $self->method_link_species_set_id($method_link_species_set_id) if (defined($method_link_species_set_id));
|
|
253 $self->dnafrag($dnafrag) if (defined($dnafrag));
|
|
254 $self->dnafrag_id($dnafrag_id) if (defined($dnafrag_id));
|
|
255 $self->dnafrag_start($dnafrag_start) if (defined($dnafrag_start));
|
|
256 $self->dnafrag_end($dnafrag_end) if (defined($dnafrag_end));
|
|
257 $self->dnafrag_strand($dnafrag_strand) if (defined($dnafrag_strand));
|
|
258 $self->aligned_sequence($aligned_sequence) if (defined($aligned_sequence));
|
|
259 $self->visible($visible) if (defined($visible));
|
|
260 $self->node_id($node_id) if (defined($node_id));
|
|
261
|
|
262 return $self;
|
|
263 }
|
|
264
|
|
265 =head2 new_fast
|
|
266
|
|
267 Arg [1] : hash reference $hashref
|
|
268 Example : none
|
|
269 Description: This is an ultra fast constructor which requires knowledge of
|
|
270 the objects internals to be used.
|
|
271 Returntype :
|
|
272 Exceptions : none
|
|
273 Caller :
|
|
274 Status : Stable
|
|
275
|
|
276 =cut
|
|
277
|
|
278 sub new_fast {
|
|
279 my $class = shift;
|
|
280 my $hashref = shift;
|
|
281
|
|
282 return bless $hashref, $class;
|
|
283 }
|
|
284
|
|
285
|
|
286 =head2 copy (CONSTRUCTOR)
|
|
287
|
|
288 Arg : -none-
|
|
289 Example : my $new_genomic_align = $genomic_align->copy();
|
|
290 Description : Create a new object with the same attributes
|
|
291 as this one.
|
|
292 Returntype : Bio::EnsEMBL::Compara::GenomicAlign (or subclassed) object
|
|
293 Exceptions :
|
|
294 Status : Stable
|
|
295
|
|
296 =cut
|
|
297
|
|
298 sub copy {
|
|
299 my ($self) = @_;
|
|
300 my $new_copy = {};
|
|
301 bless $new_copy, ref($self);
|
|
302
|
|
303 while (my ($key, $value) = each %$self) {
|
|
304 $new_copy->{$key} = $value;
|
|
305 }
|
|
306
|
|
307 return $new_copy;
|
|
308 }
|
|
309
|
|
310
|
|
311 =head2 adaptor
|
|
312
|
|
313 Arg [1] : Bio::EnsEMBL::Compara::DBSQL::GenomicAlignAdaptor
|
|
314 Example : $adaptor = $genomic_align->adaptor;
|
|
315 Example : $genomic_align->adaptor($adaptor);
|
|
316 Description: Getter/Setter for the adaptor this object uses for database
|
|
317 interaction.
|
|
318 Returntype : Bio::EnsEMBL::Compara::DBSQL::GenomicAlignAdaptor
|
|
319 Exceptions : thrown if $adaptor is not a
|
|
320 Bio::EnsEMBL::Compara::DBSQL::GenomicAlignAdaptor object
|
|
321 Caller : object->methodname
|
|
322 Status : Stable
|
|
323
|
|
324 =cut
|
|
325
|
|
326 sub adaptor {
|
|
327 my $self = shift;
|
|
328
|
|
329 if (@_) {
|
|
330 $self->{'adaptor'} = shift;
|
|
331 throw($self->{'adaptor'}." is not a Bio::EnsEMBL::Compara::DBSQL::GenomicAlignAdaptor object")
|
|
332 if ($self->{'adaptor'} and !UNIVERSAL::isa($self->{'adaptor'}, "Bio::EnsEMBL::Compara::DBSQL::GenomicAlignAdaptor"));
|
|
333 }
|
|
334
|
|
335 return $self->{'adaptor'};
|
|
336 }
|
|
337
|
|
338
|
|
339 =head2 dbID
|
|
340
|
|
341 Arg [1] : integer $dbID
|
|
342 Example : $dbID = $genomic_align->dbID;
|
|
343 Example : $genomic_align->dbID(12);
|
|
344 Description: Getter/Setter for the attribute dbID
|
|
345 Returntype : integer
|
|
346 Exceptions : none
|
|
347 Caller : object->methodname
|
|
348 Status : Stable
|
|
349
|
|
350 =cut
|
|
351
|
|
352 sub dbID {
|
|
353 my ($self, $dbID) = @_;
|
|
354
|
|
355 if (defined($dbID)) {
|
|
356 $self->{'dbID'} = $dbID;
|
|
357 }
|
|
358
|
|
359 return $self->{'dbID'};
|
|
360 }
|
|
361
|
|
362
|
|
363 =head2 genomic_align_block
|
|
364
|
|
365 Arg [1] : Bio::EnsEMBL::Compara::GenomicAlignBlock $genomic_align_block
|
|
366 Example : $genomic_align_block = $genomic_align->genomic_align_block;
|
|
367 Example : $genomic_align->genomic_align_block($genomic_align_block);
|
|
368 Description: Getter/Setter for the attribute genomic_align_block
|
|
369 Returntype : Bio::EnsEMBL::Compara::GenomicAlignBlock object. If no
|
|
370 argument is given, the genomic_align_block is not defined but
|
|
371 both the genomic_align_block_id and the adaptor are, it tries
|
|
372 to fetch the data using the genomic_align_block_id.
|
|
373 Exception : throws if $genomic_align_block is not a
|
|
374 Bio::EnsEMBL::Compara::GenomicAlignBlock object or if
|
|
375 $genomic_align_block does not match a previously defined
|
|
376 genomic_align_block_id
|
|
377 Warning : warns if getting data from other sources fails.
|
|
378 Caller : object->methodname
|
|
379 Status : Stable
|
|
380
|
|
381 =cut
|
|
382
|
|
383 sub genomic_align_block {
|
|
384 my ($self, $genomic_align_block) = @_;
|
|
385
|
|
386 if (defined($genomic_align_block)) {
|
|
387 throw("$genomic_align_block is not a Bio::EnsEMBL::Compara::BaseGenomicAlignSet object")
|
|
388 if (!$genomic_align_block->isa("Bio::EnsEMBL::Compara::BaseGenomicAlignSet"));
|
|
389 weaken($self->{'genomic_align_block'} = $genomic_align_block);
|
|
390
|
|
391 ## Add adaptor to genomic_align_block object if possible and needed
|
|
392 if (!defined($genomic_align_block->{'adaptor'}) and !defined($genomic_align_block->{'_adaptor'}) and defined($self->{'adaptor'})) {
|
|
393 $genomic_align_block->adaptor($self->adaptor->db->get_GenomicAlignBlockAdaptor);
|
|
394 }
|
|
395
|
|
396 if ($genomic_align_block->isa("Bio::EnsEMBL::Compara::GenomicAlignBlock")) {
|
|
397 if ($self->{'genomic_align_block_id'}) {
|
|
398 if (!$self->{'genomic_align_block'}->{'dbID'}) {
|
|
399 $self->{'genomic_align_block'}->dbID($self->{'genomic_align_block_id'});
|
|
400 }
|
|
401 # warning("Defining both genomic_align_block_id and genomic_align_block");
|
|
402 throw("dbID of genomic_align_block object does not match previously defined".
|
|
403 " genomic_align_block_id. If you want to override a".
|
|
404 " Bio::EnsEMBL::Compara::GenomicAlign object, you can reset the ".
|
|
405 "genomic_align_block_id using \$genomic_align->genomic_align_block_id(0)")
|
|
406 if ($self->{'genomic_align_block'}->{'dbID'} != $self->{'genomic_align_block_id'});
|
|
407 } else {
|
|
408 $self->{'genomic_align_block_id'} = $genomic_align_block->{'dbID'};
|
|
409 }
|
|
410 }
|
|
411
|
|
412 } elsif (!defined($self->{'genomic_align_block'})) {
|
|
413 # Try to get the genomic_align_block from other sources...
|
|
414 if (defined($self->genomic_align_block_id) and defined($self->{'adaptor'})) {
|
|
415 # ...from the genomic_align_block_id. Uses genomic_align_block_id function
|
|
416 # and not the attribute in the <if> clause because the attribute can be retrieved from other
|
|
417 # sources if it has not been set before.
|
|
418 my $genomic_align_block_adaptor = $self->{'adaptor'}->db->get_GenomicAlignBlockAdaptor;
|
|
419 $self->{'genomic_align_block'} = $genomic_align_block_adaptor->fetch_by_dbID(
|
|
420 $self->{'genomic_align_block_id'});
|
|
421 } else {
|
|
422 # warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->genomic_align_block".
|
|
423 # " You either have to specify more information (see perldoc for".
|
|
424 # " Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
|
|
425 }
|
|
426 }
|
|
427
|
|
428 return $self->{'genomic_align_block'};
|
|
429 }
|
|
430
|
|
431
|
|
432 =head2 genomic_align_block_id
|
|
433
|
|
434 Arg [1] : integer $genomic_align_block_id
|
|
435 Example : $genomic_align_block_id = $genomic_align->genomic_align_block_id;
|
|
436 Example : $genomic_align->genomic_align_block_id(1032);
|
|
437 Description: Getter/Setter for the attribute genomic_align_block_id. If no
|
|
438 argument is given and the genomic_align_block_id is not defined, it
|
|
439 tries to get the data from other sources like the corresponding
|
|
440 Bio::EnsEMBL::Compara::GenomicAlignBlock object or the database using
|
|
441 the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
|
|
442 Use 0 as argument to clear this attribute.
|
|
443 Returntype : integer
|
|
444 Exceptions : thrown if $genomic_align_block_id does not match a previously defined
|
|
445 genomic_align_block
|
|
446 Warning : warns if getting data from other sources fails.
|
|
447 Caller : object->methodname
|
|
448 Status : Stable
|
|
449
|
|
450 =cut
|
|
451
|
|
452 sub genomic_align_block_id {
|
|
453 my ($self, $genomic_align_block_id) = @_;
|
|
454
|
|
455 if (defined($genomic_align_block_id)) {
|
|
456
|
|
457 $self->{'genomic_align_block_id'} = ($genomic_align_block_id or undef);
|
|
458 if (defined($self->{'genomic_align_block'}) and $self->{'genomic_align_block_id'}) {
|
|
459 # warning("Defining both genomic_align_block_id and genomic_align_block");
|
|
460 throw("genomic_align_block_id does not match previously defined genomic_align_block object")
|
|
461 if ($self->{'genomic_align_block'} and
|
|
462 $self->{'genomic_align_block'}->dbID != $self->{'genomic_align_block_id'});
|
|
463 }
|
|
464 } elsif (!($self->{'genomic_align_block_id'})) {
|
|
465 # Try to get the ID from other sources...
|
|
466 if (defined($self->{'genomic_align_block'})) {
|
|
467 if ($self->{genomic_align_block}->isa("Bio::EnsEMBL::Compara::GenomicAlignBlock")and defined($self->{'genomic_align_block'}->dbID)) {
|
|
468 # ...from the corresponding Bio::EnsEMBL::Compara::GenomicAlignBlock object
|
|
469 $self->{'genomic_align_block_id'} = $self->{'genomic_align_block'}->dbID;
|
|
470 }
|
|
471 } elsif (defined($self->{'adaptor'}) and defined($self->{'dbID'})) {
|
|
472 # ...from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
|
|
473 $self->adaptor->retrieve_all_direct_attributes($self);
|
|
474 } else {
|
|
475 # warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->genomic_align_block_id".
|
|
476 # " You either have to specify more information (see perldoc for".
|
|
477 # " Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
|
|
478 }
|
|
479 }
|
|
480
|
|
481 return $self->{'genomic_align_block_id'};
|
|
482 }
|
|
483
|
|
484
|
|
485 =head2 method_link_species_set
|
|
486
|
|
487 Arg [1] : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $method_link_species_set
|
|
488 Example : $method_link_species_set = $genomic_align->method_link_species_set;
|
|
489 Example : $genomic_align->method_link_species_set($method_link_species_set);
|
|
490 Description: Getter/Setter for the attribute method_link_species_set. If no
|
|
491 argument is given and the method_link_species_set is not defined, it
|
|
492 tries to get the data from other sources like the corresponding
|
|
493 Bio::EnsEMBL::Compara::GenomicAlignBlock object or from
|
|
494 the method_link_species_set_id.
|
|
495 Returntype : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object
|
|
496 Exceptions : thrown if $method_link_species_set is not a
|
|
497 Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object or if
|
|
498 $method_link_species_set does not match a previously defined
|
|
499 method_link_species_set_id
|
|
500 Warning : warns if getting data from other sources fails.
|
|
501 Caller : object->methodname
|
|
502 Status : Stable
|
|
503
|
|
504 =cut
|
|
505
|
|
506 sub method_link_species_set {
|
|
507 my ($self, $method_link_species_set) = @_;
|
|
508
|
|
509 if (defined($method_link_species_set)) {
|
|
510 throw("$method_link_species_set is not a Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object")
|
|
511 if (!$method_link_species_set->isa("Bio::EnsEMBL::Compara::MethodLinkSpeciesSet"));
|
|
512 $self->{'method_link_species_set'} = $method_link_species_set;
|
|
513 if ($self->{'method_link_species_set_id'}) {
|
|
514 if (!$self->{'method_link_species_set'}->dbID) {
|
|
515 $self->{'method_link_species_set'}->dbID($self->{'method_link_species_set_id'});
|
|
516 } else {
|
|
517 $self->{'method_link_species_set_id'} = $self->{'method_link_species_set'}->dbID();
|
|
518 }
|
|
519 } else {
|
|
520 $self->{'method_link_species_set_id'} = $self->{'method_link_species_set'}->dbID;
|
|
521 }
|
|
522
|
|
523 } elsif (!defined($self->{'method_link_species_set'})) {
|
|
524 # Try to get the object from other sources...
|
|
525 if (defined($self->genomic_align_block) and ($self->{'genomic_align_block'}->method_link_species_set)) {
|
|
526 # ...from the corresponding Bio::EnsEMBL::Compara::GenomicAlignBlock object. Uses genomic_align_block
|
|
527 # function and not the attribute in the <if> clause because the attribute can be retrieved from other
|
|
528 # sources if it has not been already defined.
|
|
529 $self->{'method_link_species_set'} = $self->genomic_align_block->method_link_species_set;
|
|
530 } elsif (defined($self->method_link_species_set_id) and defined($self->{'adaptor'})) {
|
|
531 # ...from the method_link_species_set_id. Uses method_link_species_set_id function and not the attribute
|
|
532 # in the <if> clause because the attribute can be retrieved from other sources if it has not been
|
|
533 # already defined.
|
|
534 my $method_link_species_set_adaptor = $self->adaptor->db->get_MethodLinkSpeciesSetAdaptor;
|
|
535 $self->{'method_link_species_set'} = $method_link_species_set_adaptor->fetch_by_dbID(
|
|
536 $self->{'method_link_species_set_id'});
|
|
537 } else {
|
|
538 warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->method_link_species_set".
|
|
539 " You either have to specify more information (see perldoc for".
|
|
540 " Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
|
|
541 }
|
|
542 }
|
|
543
|
|
544 return $self->{'method_link_species_set'};
|
|
545 }
|
|
546
|
|
547
|
|
548 =head2 method_link_species_set_id
|
|
549
|
|
550 Arg [1] : integer $method_link_species_set_id
|
|
551 Example : $method_link_species_set_id = $genomic_align->method_link_species_set_id;
|
|
552 Example : $genomic_align->method_link_species_set_id(3);
|
|
553 Description: Getter/Setter for the attribute method_link_species_set_id. If no
|
|
554 argument is given and the method_link_species_set_id is not defined, it
|
|
555 tries to get the data from other sources like the corresponding
|
|
556 Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object or the database
|
|
557 using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
|
|
558 Use 0 as argument to clear this attribute.
|
|
559 Returntype : integer
|
|
560 Exceptions : thrown if $method_link_species_set_id does not match a previously defined
|
|
561 method_link_species_set
|
|
562 Warning : warns if getting data from other sources fails.
|
|
563 Caller : object->methodname
|
|
564 Status : Stable
|
|
565
|
|
566 =cut
|
|
567
|
|
568 sub method_link_species_set_id {
|
|
569 my ($self, $method_link_species_set_id) = @_;
|
|
570
|
|
571 if (defined($method_link_species_set_id)) {
|
|
572 $self->{'method_link_species_set_id'} = $method_link_species_set_id;
|
|
573 if (defined($self->{'method_link_species_set'}) and $self->{'method_link_species_set_id'}) {
|
|
574 $self->{'method_link_species_set'} = undef;
|
|
575 }
|
|
576 } elsif (!$self->{'method_link_species_set_id'}) {
|
|
577 # Try to get the ID from other sources...
|
|
578 if (defined($self->{'method_link_species_set'}) and $self->{'method_link_species_set'}->dbID) {
|
|
579 # ...from the corresponding Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object
|
|
580 $self->{'method_link_species_set_id'} = $self->{'method_link_species_set'}->dbID;
|
|
581 } elsif (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
|
|
582 # ...from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
|
|
583 $self->adaptor->retrieve_all_direct_attributes($self);
|
|
584 } else {
|
|
585 warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->method_link_species_set_id".
|
|
586 " You either have to specify more information (see perldoc for".
|
|
587 " Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
|
|
588 }
|
|
589 }
|
|
590
|
|
591 return $self->{'method_link_species_set_id'};
|
|
592 }
|
|
593
|
|
594
|
|
595 =head2 genome_db
|
|
596
|
|
597 Arg [1] : Bio::EnsEMBL::Compara::GenomeDB $genome_db
|
|
598 Example : $genome_db = $genomic_align->genome_db;
|
|
599 Example : $genomic_align->genome_db($genome_db);
|
|
600 Description: Getter/Setter for the attribute genome_db of
|
|
601 the dnafrag. This method is a short cut for
|
|
602 $genomic_align->dnafrag->genome_db()
|
|
603 Returntype : Bio::EnsEMBL::Compara::DnaFrag object
|
|
604 Exceptions : thrown if $genomic_align->dnafrag is not
|
|
605 defined and cannot be fetched from other
|
|
606 sources.
|
|
607 Caller : object->methodname
|
|
608 Status : Stable
|
|
609
|
|
610 =cut
|
|
611
|
|
612 sub genome_db {
|
|
613 my ($self, $genome_db) = @_;
|
|
614
|
|
615 if (defined($genome_db)) {
|
|
616 throw("$genome_db is not a Bio::EnsEMBL::Compara::GenomeDB object")
|
|
617 if (!UNIVERSAL::isa($genome_db, "Bio::EnsEMBL::Compara::GenomeDB"));
|
|
618 my $dnafrag = $self->dnafrag();
|
|
619 if (!$dnafrag) {
|
|
620 throw("Cannot set genome_db if dnafrag does not exist");
|
|
621 } else {
|
|
622 $self->dnafrag->genome_db($genome_db);
|
|
623 }
|
|
624 }
|
|
625 return $self->dnafrag->genome_db;
|
|
626 }
|
|
627
|
|
628
|
|
629 =head2 dnafrag
|
|
630
|
|
631 Arg [1] : Bio::EnsEMBL::Compara::DnaFrag $dnafrag
|
|
632 Example : $dnafrag = $genomic_align->dnafrag;
|
|
633 Example : $genomic_align->dnafrag($dnafrag);
|
|
634 Description: Getter/Setter for the attribute dnafrag. If no
|
|
635 argument is given, the dnafrag is not defined but
|
|
636 both the dnafrag_id and the adaptor are, it tries
|
|
637 to fetch the data using the dnafrag_id
|
|
638 Returntype : Bio::EnsEMBL::Compara::DnaFrag object
|
|
639 Exceptions : thrown if $dnafrag is not a Bio::EnsEMBL::Compara::DnaFrag
|
|
640 object or if $dnafrag does not match a previously defined
|
|
641 dnafrag_id
|
|
642 Warning : warns if getting data from other sources fails.
|
|
643 Caller : object->methodname
|
|
644 Status : Stable
|
|
645
|
|
646 =cut
|
|
647
|
|
648 sub dnafrag {
|
|
649 my ($self, $dnafrag) = @_;
|
|
650
|
|
651 if (defined($dnafrag)) {
|
|
652 throw("$dnafrag is not a Bio::EnsEMBL::Compara::DnaFrag object")
|
|
653 if (!$dnafrag->isa("Bio::EnsEMBL::Compara::DnaFrag"));
|
|
654 $self->{'dnafrag'} = $dnafrag;
|
|
655 if ($self->{'dnafrag_id'}) {
|
|
656 if (!$self->{'dnafrag'}->dbID) {
|
|
657 $self->{'dnafrag'}->dbID($self->{'dnafrag_id'});
|
|
658 }
|
|
659 # warning("Defining both dnafrag_id and dnafrag");
|
|
660 throw("dnafrag object does not match previously defined dnafrag_id")
|
|
661 if ($self->{'dnafrag'}->dbID != $self->{'dnafrag_id'});
|
|
662 } else {
|
|
663 $self->{'dnafrag_id'} = $self->{'dnafrag'}->dbID;
|
|
664 }
|
|
665
|
|
666 } elsif (!defined($self->{'dnafrag'})) {
|
|
667
|
|
668 # Try to get data from other sources...
|
|
669 if (defined($self->dnafrag_id) and defined($self->{'adaptor'})) {
|
|
670 # ...from the dnafrag_id. Use dnafrag_id function and not the attribute in the <if>
|
|
671 # clause because the attribute can be retrieved from other sources if it has not been already defined.
|
|
672 my $dnafrag_adaptor = $self->adaptor->db->get_DnaFragAdaptor;
|
|
673 $self->{'dnafrag'} = $dnafrag_adaptor->fetch_by_dbID($self->{'dnafrag_id'});
|
|
674 } else {
|
|
675 warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->dnafrag".
|
|
676 " You either have to specify more information (see perldoc for".
|
|
677 " Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
|
|
678 }
|
|
679 }
|
|
680 return $self->{'dnafrag'};
|
|
681 }
|
|
682
|
|
683
|
|
684 =head2 dnafrag_id
|
|
685
|
|
686 Arg [1] : integer $dnafrag_id
|
|
687 Example : $dnafrag_id = $genomic_align->dnafrag_id;
|
|
688 Example : $genomic_align->dnafrag_id(134);
|
|
689 Description: Getter/Setter for the attribute dnafrag_id. If no
|
|
690 argument is given and the dnafrag_id is not defined, it tries to
|
|
691 get the ID from other sources like the corresponding
|
|
692 Bio::EnsEMBL::Compara::DnaFrag object or the database using the dbID
|
|
693 of the Bio::EnsEMBL::Compara::GenomicAlign object.
|
|
694 Use 0 as argument to clear this attribute.
|
|
695 Returntype : integer
|
|
696 Exceptions : thrown if $dnafrag_id does not match a previously defined
|
|
697 dnafrag
|
|
698 Warning : warns if getting data from other sources fails.
|
|
699 Caller : object->methodname
|
|
700 Status : Stable
|
|
701
|
|
702 =cut
|
|
703
|
|
704 sub dnafrag_id {
|
|
705 my ($self, $dnafrag_id) = @_;
|
|
706
|
|
707 if (defined($dnafrag_id)) {
|
|
708 $self->{'dnafrag_id'} = $dnafrag_id;
|
|
709 if (defined($self->{'dnafrag'}) and $self->{'dnafrag_id'}) {
|
|
710 # warning("Defining both dnafrag_id and dnafrag");
|
|
711 throw("dnafrag_id does not match previously defined dnafrag object")
|
|
712 if ($self->{'dnafrag'} and $self->{'dnafrag'}->dbID != $self->{'dnafrag_id'});
|
|
713 }
|
|
714
|
|
715 } elsif (!($self->{'dnafrag_id'})) {
|
|
716 # Try to get the ID from other sources...
|
|
717 if (defined($self->{'dnafrag'}) and defined($self->{'dnafrag'}->dbID)) {
|
|
718 # ...from the corresponding Bio::EnsEMBL::Compara::DnaFrag object
|
|
719 $self->{'dnafrag_id'} = $self->{'dnafrag'}->dbID;
|
|
720 } elsif (defined($self->{'adaptor'}) and defined($self->{'dbID'})) {
|
|
721 # ...from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
|
|
722 $self->adaptor->retrieve_all_direct_attributes($self);
|
|
723 } else {
|
|
724 warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->dnafrag_id".
|
|
725 " You either have to specify more information (see perldoc for".
|
|
726 " Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
|
|
727 }
|
|
728 }
|
|
729
|
|
730 return $self->{'dnafrag_id'};
|
|
731 }
|
|
732
|
|
733
|
|
734 =head2 dnafrag_start
|
|
735
|
|
736 Arg [1] : integer $dnafrag_start
|
|
737 Example : $dnafrag_start = $genomic_align->dnafrag_start;
|
|
738 Example : $genomic_align->dnafrag_start(1233354);
|
|
739 Description: Getter/Setter for the attribute dnafrag_start. If no argument is given, the
|
|
740 dnafrag_start is not defined but both the dbID and the adaptor are, it tries
|
|
741 to fetch and set all the direct attributes from the database using the
|
|
742 dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
|
|
743 Returntype : integer
|
|
744 Exceptions : none
|
|
745 Warning : warns if getting data from other sources fails.
|
|
746 Caller : object->methodname
|
|
747 Status : Stable
|
|
748
|
|
749 =cut
|
|
750
|
|
751 sub dnafrag_start {
|
|
752 my ($self, $dnafrag_start) = @_;
|
|
753
|
|
754 if (defined($dnafrag_start)) {
|
|
755 $self->{'dnafrag_start'} = $dnafrag_start;
|
|
756
|
|
757 } elsif (!defined($self->{'dnafrag_start'})) {
|
|
758 if (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
|
|
759 # Try to get the values from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
|
|
760 $self->adaptor->retrieve_all_direct_attributes($self);
|
|
761 } else {
|
|
762 warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->dnafrag_start".
|
|
763 " You either have to specify more information (see perldoc for".
|
|
764 " Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
|
|
765 }
|
|
766 }
|
|
767
|
|
768 return $self->{'dnafrag_start'};
|
|
769 }
|
|
770
|
|
771
|
|
772 =head2 dnafrag_end
|
|
773
|
|
774 Arg [1] : integer $dnafrag_end
|
|
775 Example : $dnafrag_end = $genomic_align->dnafrag_end;
|
|
776 Example : $genomic_align->dnafrag_end(1235320);
|
|
777 Description: Getter/Setter for the attribute dnafrag_end. If no argument is given, the
|
|
778 dnafrag_end is not defined but both the dbID and the adaptor are, it tries
|
|
779 to fetch and set all the direct attributes from the database using the
|
|
780 dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
|
|
781 Returntype : integer
|
|
782 Exceptions : none
|
|
783 Warning : warns if getting data from other sources fails.
|
|
784 Caller : object->methodname
|
|
785 Status : Stable
|
|
786
|
|
787 =cut
|
|
788
|
|
789 sub dnafrag_end {
|
|
790 my ($self, $dnafrag_end) = @_;
|
|
791
|
|
792 if (defined($dnafrag_end)) {
|
|
793 $self->{'dnafrag_end'} = $dnafrag_end;
|
|
794
|
|
795 } elsif (!defined($self->{'dnafrag_end'})) {
|
|
796 if (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
|
|
797 # Try to get the values from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
|
|
798 $self->adaptor->retrieve_all_direct_attributes($self);
|
|
799 } else {
|
|
800 warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->dnafrag_end".
|
|
801 " You either have to specify more information (see perldoc for".
|
|
802 " Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
|
|
803 }
|
|
804 }
|
|
805
|
|
806 return $self->{'dnafrag_end'};
|
|
807 }
|
|
808
|
|
809
|
|
810 =head2 dnafrag_strand
|
|
811
|
|
812 Arg [1] : integer $dnafrag_strand (1 or -1)
|
|
813 Example : $dnafrag_strand = $genomic_align->dnafrag_strand;
|
|
814 Example : $genomic_align->dnafrag_strand(1);
|
|
815 Description: Getter/Setter for the attribute dnafrag_strand. If no argument is given, the
|
|
816 dnafrag_strand is not defined but both the dbID and the adaptor are, it tries
|
|
817 to fetch and set all the direct attributes from the database using the
|
|
818 dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
|
|
819 Returntype : integer
|
|
820 Exceptions : none
|
|
821 Warning : warns if getting data from other sources fails.
|
|
822 Caller : object->methodname
|
|
823 Status : Stable
|
|
824
|
|
825 =cut
|
|
826
|
|
827 sub dnafrag_strand {
|
|
828 my ($self, $dnafrag_strand) = @_;
|
|
829
|
|
830 if (defined($dnafrag_strand)) {
|
|
831 $self->{'dnafrag_strand'} = $dnafrag_strand;
|
|
832
|
|
833 } elsif (!defined($self->{'dnafrag_strand'})) {
|
|
834 if (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
|
|
835 # Try to get the values from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
|
|
836 $self->adaptor->retrieve_all_direct_attributes($self);
|
|
837 } else {
|
|
838 warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->dnafrag_strand".
|
|
839 " You either have to specify more information (see perldoc for".
|
|
840 " Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
|
|
841 }
|
|
842 }
|
|
843
|
|
844 return $self->{'dnafrag_strand'};
|
|
845 }
|
|
846
|
|
847
|
|
848 =head2 aligned_sequence
|
|
849
|
|
850 Arg [1...] : string $aligned_sequence or string @flags
|
|
851 Example : $aligned_sequence = $genomic_align->aligned_sequence
|
|
852 Example : $aligned_sequence = $genomic_align->aligned_sequence("+FIX_SEQ");
|
|
853 Example : $genomic_align->aligned_sequence("ACTAGTTAGCT---TATCT--TTAAA")
|
|
854 Description: With no arguments, rebuilds the alignment string for this sequence
|
|
855 using the cigar_line information and the original sequence if needed.
|
|
856 This sequence depends on the strand defined by the dnafrag_strand attribute.
|
|
857 Flags : +FIX_SEQ
|
|
858 With this flag, the method will return a sequence that could be
|
|
859 directly aligned with the original_sequence of the reference
|
|
860 genomic_align.
|
|
861 Returntype : string $aligned_sequence
|
|
862 Exceptions : thrown if sequence contains unknown symbols
|
|
863 Warning : warns if getting data from other sources fails.
|
|
864 Caller : object->methodname
|
|
865 Status : Stable
|
|
866
|
|
867 =cut
|
|
868
|
|
869 sub aligned_sequence {
|
|
870 my ($self, @aligned_sequence_or_flags) = @_;
|
|
871 my $aligned_sequence;
|
|
872
|
|
873 my $fix_seq = 0;
|
|
874 my $fake_seq = 0;
|
|
875 foreach my $flag (@aligned_sequence_or_flags) {
|
|
876 if ($flag =~ /^\+/) {
|
|
877 if ($flag eq "+FIX_SEQ") {
|
|
878 $fix_seq = 1;
|
|
879 } elsif ($flag eq "+FAKE_SEQ") {
|
|
880 $fake_seq = 1;
|
|
881 } else {
|
|
882 warning("Unknow flag $flag when calling".
|
|
883 " Bio::EnsEMBL::Compara::GenomicAlign::aligned_sequence()");
|
|
884 }
|
|
885 } else {
|
|
886 $aligned_sequence = $flag;
|
|
887 }
|
|
888 }
|
|
889
|
|
890 if (defined($aligned_sequence)) {
|
|
891 $aligned_sequence =~ s/[\r\n]+$//;
|
|
892
|
|
893 if ($aligned_sequence) {
|
|
894 ## Check sequence
|
|
895 throw("Unreadable sequence ($aligned_sequence)") if ($aligned_sequence !~ /^[\-\.A-Z]+$/i);
|
|
896 $self->{'aligned_sequence'} = $aligned_sequence;
|
|
897 } else {
|
|
898 $self->{'aligned_sequence'} = undef;
|
|
899 }
|
|
900 } elsif (!defined($self->{'aligned_sequence'})) {
|
|
901 # Try to get the aligned_sequence from other sources...
|
|
902 if (defined($self->cigar_line) and $fake_seq) {
|
|
903 # ...from the corresponding cigar_line (using a fake seq)
|
|
904 $aligned_sequence = _get_fake_aligned_sequence_from_cigar_line(
|
|
905 $self->{'cigar_line'});
|
|
906
|
|
907 } elsif (defined($self->cigar_line) and defined($self->original_sequence)) {
|
|
908 my $original_sequence = $self->original_sequence;
|
|
909 # ...from the corresponding orginial_sequence and cigar_line
|
|
910 $aligned_sequence = _get_aligned_sequence_from_original_sequence_and_cigar_line(
|
|
911 $original_sequence, $self->{'cigar_line'});
|
|
912 $self->{'aligned_sequence'} = $aligned_sequence;
|
|
913
|
|
914 } else {
|
|
915 warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->aligned_sequence".
|
|
916 " You either have to specify more information (see perldoc for".
|
|
917 " Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
|
|
918 }
|
|
919 }
|
|
920
|
|
921 $aligned_sequence = $self->{'aligned_sequence'} if (defined($self->{'aligned_sequence'}));
|
|
922 if ($aligned_sequence and $fix_seq) {
|
|
923 $aligned_sequence = _get_aligned_sequence_from_original_sequence_and_cigar_line(
|
|
924 $aligned_sequence, $self->genomic_align_block->reference_genomic_align->cigar_line, $fix_seq);
|
|
925 }
|
|
926
|
|
927 return $aligned_sequence;
|
|
928 }
|
|
929
|
|
930
|
|
931 =head2 length
|
|
932
|
|
933 Arg [1] : -none-
|
|
934 Example : $length = $genomic_align->length;
|
|
935 Description: get the length of the aligned sequence. This method will try to
|
|
936 get the length from the aligned_sequence if already set or by
|
|
937 parsing the cigar_line otherwise
|
|
938 Returntype : int
|
|
939 Exceptions : none
|
|
940 Warning :
|
|
941 Caller : object->methodname
|
|
942 Status : Stable
|
|
943
|
|
944 =cut
|
|
945
|
|
946 sub length {
|
|
947 my $self = shift;
|
|
948
|
|
949 if ($self->{aligned_sequence}) {
|
|
950 return length($self->{aligned_sequence});
|
|
951 } elsif ($self->{cigar_line}) {
|
|
952 my $length = 0;
|
|
953 my $cigar_line = $self->{cigar_line};
|
|
954 my @cig = ( $cigar_line =~ /(\d*[GMDXI])/g );
|
|
955 for my $cigElem ( @cig ) {
|
|
956 my $cigType = substr( $cigElem, -1, 1);
|
|
957 my $cigCount = substr( $cigElem, 0 , -1);
|
|
958 $cigCount = 1 if ($cigCount eq "");
|
|
959 $length += $cigCount unless ($cigType eq "I");
|
|
960 }
|
|
961 return $length;
|
|
962 }
|
|
963
|
|
964 return undef;
|
|
965 }
|
|
966
|
|
967 =head2 cigar_line
|
|
968
|
|
969 Arg [1] : string $cigar_line
|
|
970 Example : $cigar_line = $genomic_align->cigar_line;
|
|
971 Example : $genomic_align->cigar_line("35M2D233M7D23MD100M");
|
|
972 Description: get/set for attribute cigar_line.
|
|
973 If no argument is given, the cigar line has not been
|
|
974 defined yet but the aligned sequence was, it calculates
|
|
975 the cigar line based on the aligned (gapped) sequence.
|
|
976 If no argument is given, the cigar_line is not defined but both
|
|
977 the dbID and the adaptor are, it tries to fetch and set all
|
|
978 the direct attributes from the database using the dbID of the
|
|
979 Bio::EnsEMBL::Compara::GenomicAlign object. You can reset this
|
|
980 attribute using an empty string as argument.
|
|
981 The cigar_line depends on the strand defined by the dnafrag_strand
|
|
982 attribute.
|
|
983 Returntype : string
|
|
984 Exceptions : none
|
|
985 Warning : warns if getting data from other sources fails.
|
|
986 Caller : object->methodname
|
|
987 Status : Stable
|
|
988
|
|
989 =cut
|
|
990
|
|
991 sub cigar_line {
|
|
992 my ($self, $arg) = @_;
|
|
993
|
|
994 if (defined($arg)) {
|
|
995 if ($arg) {
|
|
996 $self->{'cigar_line'} = $arg;
|
|
997 } else {
|
|
998 $self->{'cigar_line'} = undef;
|
|
999 }
|
|
1000
|
|
1001 } elsif (!defined($self->{'cigar_line'})) {
|
|
1002 # Try to get the cigar_line from other sources...
|
|
1003 if (defined($self->{'aligned_sequence'})) {
|
|
1004 # ...from the aligned sequence
|
|
1005
|
|
1006
|
|
1007 my $cigar_line = _get_cigar_line_from_aligned_sequence($self->{'aligned_sequence'});
|
|
1008 $self->cigar_line($cigar_line);
|
|
1009
|
|
1010 } elsif (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
|
|
1011 # ...from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
|
|
1012 $self->adaptor->retrieve_all_direct_attributes($self);
|
|
1013 } else {
|
|
1014 warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->cigar_line".
|
|
1015 " You either have to specify more information (see perldoc for".
|
|
1016 " Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
|
|
1017 }
|
|
1018 }
|
|
1019
|
|
1020 return $self->{'cigar_line'};
|
|
1021 }
|
|
1022
|
|
1023
|
|
1024 =head2 visible
|
|
1025
|
|
1026 Arg [1] : int $visible
|
|
1027 Example : $visible = $genomic_align->visible
|
|
1028 Example : $genomic_align->visible(1);
|
|
1029 Description: get/set for attribute visible. If no argument is given, visible
|
|
1030 is not defined but both the dbID and the adaptor are, it tries to
|
|
1031 fetch and set all the direct attributes from the database using the
|
|
1032 dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
|
|
1033 Returntype : int
|
|
1034 Exceptions : none
|
|
1035 Warning : warns if getting data from other sources fails.
|
|
1036 Caller : object->methodname
|
|
1037 Status : Stable
|
|
1038
|
|
1039 =cut
|
|
1040
|
|
1041 sub visible {
|
|
1042 my ($self, $visible) = @_;
|
|
1043
|
|
1044 if (defined($visible)) {
|
|
1045 $self->{'visible'} = $visible;
|
|
1046
|
|
1047 } elsif (!defined($self->{'visible'})) {
|
|
1048 if (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
|
|
1049 # Try to get the values from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
|
|
1050 $self->adaptor->retrieve_all_direct_attributes($self);
|
|
1051 } else {
|
|
1052 warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->visible".
|
|
1053 " You either have to specify more information (see perldoc for".
|
|
1054 " Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
|
|
1055 }
|
|
1056 }
|
|
1057
|
|
1058 return $self->{'visible'};
|
|
1059 }
|
|
1060
|
|
1061 =head2 node_id
|
|
1062
|
|
1063 Arg [1] : [optional] int $node_id
|
|
1064 Example : $node_id = $genomic_align->node_id;
|
|
1065 Example : $genomic_align->node_id(5530000000004);
|
|
1066 Description: get/set for the node_id.This links the Bio::EnsEMBL::Compara::GenomicAlign to the
|
|
1067 Bio::EnsEMBL::Compara::GenomicAlignTree. The default value is NULL. If no argument is given, the node_id
|
|
1068 is not defined but both the dbID and the adaptor are, it tries to
|
|
1069 fetch and set all the direct attributes from the database using the
|
|
1070 dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
|
|
1071 Returntype : int
|
|
1072 Exceptions : none
|
|
1073 Warning : warns if getting data from other sources fails.
|
|
1074 Caller : object->methodname
|
|
1075 Status : At risk
|
|
1076
|
|
1077 =cut
|
|
1078
|
|
1079 sub node_id {
|
|
1080 my ($self, $node_id) = @_;
|
|
1081
|
|
1082 if (defined($node_id)) {
|
|
1083 $self->{'node_id'} = $node_id;
|
|
1084 } elsif (!defined($self->{'node_id'})) {
|
|
1085 if (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
|
|
1086 # Try to get the values from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
|
|
1087 $self->adaptor->retrieve_all_direct_attributes($self);
|
|
1088 }
|
|
1089 }
|
|
1090 return $self->{'node_id'};
|
|
1091 }
|
|
1092
|
|
1093 =head2 genomic_align_group
|
|
1094
|
|
1095 Arg [2] : [optional] Bio::EnsEMBL::Compara::GenomicAlignGroup $genomic_align_group
|
|
1096 Example : $genomic_align_group = $genomic_align->genomic_align_group();
|
|
1097 Example : $genomic_align->genomic_align_group($genomic_align_group);
|
|
1098 Description: get/set for the Bio::EnsEMBL::Compara::GenomicAlginGroup object
|
|
1099 corresponding to this Bio::EnsEMBL::Compara::GenomicAlign object
|
|
1100 Returntype : int
|
|
1101 Exceptions : none
|
|
1102 Warning : warns if getting data from other sources fails.
|
|
1103 Caller : object->methodname
|
|
1104 Status : Stable
|
|
1105
|
|
1106 =cut
|
|
1107
|
|
1108 sub genomic_align_group {
|
|
1109 my ($self, $genomic_align_group) = @_;
|
|
1110
|
|
1111 deprecate("Removed genomic_align_group table");
|
|
1112
|
|
1113 }
|
|
1114
|
|
1115
|
|
1116 =head2 genomic_align_group_id
|
|
1117
|
|
1118 Arg [2] : [optional] int $genomic_align_group_id
|
|
1119 Example : $genomic_align_group_id = $genomic_align->genomic_align_group_id();
|
|
1120 Example : $genomic_align->genomic_align_group_id(18);
|
|
1121 Description: get/set for the genomic_align_group_id corresponding to this
|
|
1122 Bio::EnsEMBL::Compara::GenomicAlign object
|
|
1123 Returntype : int
|
|
1124 Exceptions : none
|
|
1125 Warning : warns if getting data from other sources fails.
|
|
1126 Caller : object->methodname
|
|
1127 Status : Stable
|
|
1128
|
|
1129 =cut
|
|
1130
|
|
1131 sub genomic_align_group_id {
|
|
1132 my ($self, $genomic_align_group_id) = @_;
|
|
1133
|
|
1134 deprecate("Removed genomic_align_group table");
|
|
1135 }
|
|
1136
|
|
1137
|
|
1138 =head2 original_sequence
|
|
1139
|
|
1140 Arg [1] : none
|
|
1141 Example : $original_sequence = $genomic_align->original_sequence
|
|
1142 Description: get/set original sequence. If no argument is given and the original_sequence
|
|
1143 is not defined, it tries to fetch the data from other sources like the
|
|
1144 aligned sequence or the the Bio::EnsEMBL::Compara:DnaFrag object. You can
|
|
1145 reset this attribute using an empty string as argument.
|
|
1146 This sequence depends on the strand defined by the dnafrag_strand attribute.
|
|
1147 Returntype : string $original_sequence
|
|
1148 Exceptions :
|
|
1149 Caller : object->methodname
|
|
1150 Status : Stable
|
|
1151
|
|
1152 =cut
|
|
1153
|
|
1154 sub original_sequence {
|
|
1155 my ($self, $original_sequence) = @_;
|
|
1156
|
|
1157 if (defined($original_sequence)) {
|
|
1158 if ($original_sequence) {
|
|
1159 $self->{'original_sequence'} = $original_sequence;
|
|
1160 } else {
|
|
1161 $self->{'original_sequence'} = undef;
|
|
1162 }
|
|
1163
|
|
1164 } elsif (!defined($self->{'original_sequence'})) {
|
|
1165 # Try to get the data from other sources...
|
|
1166 if ($self->{'aligned_sequence'} and $self->{'cigar_line'} !~ /I/) {
|
|
1167 # ...from the aligned sequence
|
|
1168 $self->{'original_sequence'} = $self->{'aligned_sequence'};
|
|
1169 $self->{'original_sequence'} =~ s/\-//g;
|
|
1170
|
|
1171 } elsif (!defined($self->{'original_sequence'}) and defined($self->dnafrag)
|
|
1172 and defined($self->dnafrag_start) and defined($self->dnafrag_end)
|
|
1173 and defined($self->dnafrag_strand) and defined($self->dnafrag->slice)) {
|
|
1174 # ...from the dnafrag object. Uses dnafrag, dnafrag_start and dnafrag_methods instead of the attibutes
|
|
1175 # in the <if> clause because the attributes can be retrieved from other sources if they have not been
|
|
1176 # already defined.
|
|
1177 $self->{'original_sequence'} = $self->dnafrag->slice->subseq(
|
|
1178 $self->dnafrag_start,
|
|
1179 $self->dnafrag_end,
|
|
1180 $self->dnafrag_strand
|
|
1181 );
|
|
1182 } else {
|
|
1183 warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->genomic_align_groups".
|
|
1184 " You either have to specify more information (see perldoc for".
|
|
1185 " Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
|
|
1186 }
|
|
1187 }
|
|
1188
|
|
1189 return $self->{'original_sequence'};
|
|
1190 }
|
|
1191
|
|
1192 =head2 _get_cigar_line_from_aligned_sequence
|
|
1193
|
|
1194 Arg [1] : string $aligned_sequence
|
|
1195 Example : $cigar_line = _get_cigar_line_from_aligned_sequence("CGT-AACTGATG--TTA")
|
|
1196 Description: get cigar line from gapped sequence
|
|
1197 Returntype : string $cigar_line
|
|
1198 Exceptions :
|
|
1199 Caller : methodname
|
|
1200 Status : Stable
|
|
1201
|
|
1202 =cut
|
|
1203
|
|
1204 sub _get_cigar_line_from_aligned_sequence {
|
|
1205 my ($aligned_sequence) = @_;
|
|
1206 my $cigar_line = "";
|
|
1207
|
|
1208 my @pieces = grep {$_} split(/(\-+)|(\.+)/, $aligned_sequence);
|
|
1209 foreach my $piece (@pieces) {
|
|
1210 my $mode;
|
|
1211 if ($piece =~ /\-/) {
|
|
1212 $mode = "D"; # D for gaps (deletions)
|
|
1213 } elsif ($piece =~ /\./) {
|
|
1214 $mode = "X"; # X for pads (in 2X genomes)
|
|
1215 } else {
|
|
1216 $mode = "M"; # M for matches/mismatches
|
|
1217 }
|
|
1218 if (CORE::length($piece) == 1) {
|
|
1219 $cigar_line .= $mode;
|
|
1220 } elsif (CORE::length($piece) > 1) { #length can be 0 if the sequence starts with a gap
|
|
1221 $cigar_line .= CORE::length($piece).$mode;
|
|
1222 }
|
|
1223 }
|
|
1224
|
|
1225 return $cigar_line;
|
|
1226 }
|
|
1227
|
|
1228
|
|
1229 =head2 _get_aligned_sequence_from_original_sequence_and_cigar_line
|
|
1230
|
|
1231 Arg [1] : string $original_sequence
|
|
1232 Arg [1] : string $cigar_line
|
|
1233 Example : $aligned_sequence = _get_aligned_sequence_from_original_sequence_and_cigar_line(
|
|
1234 "CGTAACTGATGTTA", "3MD8M2D3M")
|
|
1235 Description: get gapped sequence from original one and cigar line
|
|
1236 Returntype : string $aligned_sequence
|
|
1237 Exceptions : thrown if cigar_line does not match sequence length
|
|
1238 Caller : methodname
|
|
1239 Status : Stable
|
|
1240
|
|
1241 =cut
|
|
1242
|
|
1243 sub _get_aligned_sequence_from_original_sequence_and_cigar_line {
|
|
1244 my ($original_sequence, $cigar_line, $fix_seq) = @_;
|
|
1245 my $aligned_sequence = "";
|
|
1246
|
|
1247 return undef if (!defined($original_sequence) or !$cigar_line);
|
|
1248
|
|
1249 my $seq_pos = 0;
|
|
1250 my @cig = ( $cigar_line =~ /(\d*[GMDXI])/g );
|
|
1251
|
|
1252 for my $cigElem ( @cig ) {
|
|
1253 my $cigType = substr( $cigElem, -1, 1 );
|
|
1254 my $cigCount = substr( $cigElem, 0 ,-1 );
|
|
1255 $cigCount = 1 unless ($cigCount =~ /^\d+$/);
|
|
1256
|
|
1257 if( $cigType eq "M" ) {
|
|
1258 $aligned_sequence .= substr($original_sequence, $seq_pos, $cigCount);
|
|
1259 $seq_pos += $cigCount;
|
|
1260 } elsif( $cigType eq "I") {
|
|
1261 $seq_pos += $cigCount;
|
|
1262 } elsif( $cigType eq "X") {
|
|
1263 $aligned_sequence .= "." x $cigCount;
|
|
1264 } elsif( $cigType eq "G" || $cigType eq "D") {
|
|
1265 if ($fix_seq) {
|
|
1266 $seq_pos += $cigCount;
|
|
1267 } else {
|
|
1268 $aligned_sequence .= "-" x $cigCount;
|
|
1269 }
|
|
1270 }
|
|
1271 }
|
|
1272 throw("Cigar line ($seq_pos) does not match sequence length (".CORE::length($original_sequence).")")
|
|
1273 if ($seq_pos != CORE::length($original_sequence));
|
|
1274
|
|
1275 return $aligned_sequence;
|
|
1276 }
|
|
1277
|
|
1278
|
|
1279 =head2 _get_fake_aligned_sequence_from_cigar_line
|
|
1280
|
|
1281 Arg [1] : string $cigar_line
|
|
1282 Example : $aligned_sequence = _get_fake_aligned_sequence_from_cigar_line(
|
|
1283 "3MD8M2D3M")
|
|
1284 Description: get gapped sequence of N\'s from the cigar line
|
|
1285 Returntype : string $fake_aligned_sequence or undef if no $cigar_line
|
|
1286 Exceptions :
|
|
1287 Caller : methodname
|
|
1288 Status : Stable
|
|
1289
|
|
1290 =cut
|
|
1291
|
|
1292 sub _get_fake_aligned_sequence_from_cigar_line {
|
|
1293 my ($cigar_line, $fix_seq) = @_;
|
|
1294 my $fake_aligned_sequence = "";
|
|
1295
|
|
1296 return undef if (!$cigar_line);
|
|
1297
|
|
1298 my $seq_pos = 0;
|
|
1299
|
|
1300 my @cig = ( $cigar_line =~ /(\d*[GMDXI])/g );
|
|
1301 for my $cigElem ( @cig ) {
|
|
1302 my $cigType = substr( $cigElem, -1, 1 );
|
|
1303 my $cigCount = substr( $cigElem, 0 ,-1 );
|
|
1304 $cigCount = 1 if ($cigCount eq "");
|
|
1305
|
|
1306 if( $cigType eq "M" ) {
|
|
1307 $fake_aligned_sequence .= "N" x $cigCount;
|
|
1308 $seq_pos += $cigCount;
|
|
1309 } elsif( $cigType eq "I") {
|
|
1310 $seq_pos += $cigCount;
|
|
1311 } elsif( $cigType eq "X") {
|
|
1312 $fake_aligned_sequence .= "." x $cigCount;
|
|
1313 } elsif( $cigType eq "G" || $cigType eq "D") {
|
|
1314 if ($fix_seq) {
|
|
1315 $seq_pos += $cigCount;
|
|
1316 } else {
|
|
1317 $fake_aligned_sequence .= "-" x $cigCount;
|
|
1318 }
|
|
1319 }
|
|
1320 }
|
|
1321
|
|
1322 return $fake_aligned_sequence;
|
|
1323 }
|
|
1324
|
|
1325
|
|
1326 =head2 _print
|
|
1327
|
|
1328 Arg [1] : ref to a FILEHANDLE
|
|
1329 Example : $genomic_align->_print
|
|
1330 Description: print attributes of the object to the STDOUT or to the FILEHANDLE.
|
|
1331 Used for debuging purposes.
|
|
1332 Returntype : none
|
|
1333 Exceptions :
|
|
1334 Caller : object->methodname
|
|
1335 Status : At risk
|
|
1336
|
|
1337 =cut
|
|
1338
|
|
1339 sub _print {
|
|
1340 my ($self, $FILEH) = @_;
|
|
1341
|
|
1342 my $verbose = verbose;
|
|
1343 verbose(0);
|
|
1344
|
|
1345 $FILEH ||= \*STDOUT;
|
|
1346
|
|
1347 print $FILEH
|
|
1348 "Bio::EnsEMBL::Compara::GenomicAlign object ($self)
|
|
1349 dbID = ".($self->dbID or "-undef-")."
|
|
1350 adaptor = ".($self->adaptor or "-undef-")."
|
|
1351 genomic_align_block = ".($self->genomic_align_block or "-undef-")."
|
|
1352 genomic_align_block_id = ".($self->genomic_align_block_id or "-undef-")."
|
|
1353 method_link_species_set = ".($self->method_link_species_set or "-undef-")."
|
|
1354 method_link_species_set_id = ".($self->method_link_species_set_id or "-undef-")."
|
|
1355 dnafrag = ".($self->dnafrag or "-undef-")."
|
|
1356 dnafrag_id = ".($self->dnafrag_id or "-undef-")."
|
|
1357 dnafrag_start = ".($self->dnafrag_start or "-undef-")."
|
|
1358 dnafrag_end = ".($self->dnafrag_end or "-undef-")."
|
|
1359 dnafrag_strand = ".($self->dnafrag_strand or "-undef-")."
|
|
1360 cigar_line = ".($self->cigar_line or "-undef-")."
|
|
1361 visible = ".($self->visible or "-undef-")."
|
|
1362 original_sequence = ".($self->original_sequence or "-undef-")."
|
|
1363 aligned_sequence = ".($self->aligned_sequence or "-undef-")."
|
|
1364
|
|
1365 ";
|
|
1366 verbose($verbose);
|
|
1367
|
|
1368 }
|
|
1369
|
|
1370 =head2 display_id
|
|
1371
|
|
1372 Args : none
|
|
1373 Example : my $id = $genomic_align->display_id;
|
|
1374 Description: returns string describing this genomic_align which can be used
|
|
1375 as display_id of a Bio::Seq object or in a fasta file. The actual form is
|
|
1376 taxon_id:genome_db_id:coord_system_name:dnafrag_name:dnafrag_start:dnafrag_end:dnafrag_strand
|
|
1377 e.g.
|
|
1378 9606:1:chromosome:14:50000000:51000000:-1
|
|
1379
|
|
1380 Uses dnafrag information in addition to start and end.
|
|
1381 Returntype : string
|
|
1382 Exceptions : none
|
|
1383 Caller : general
|
|
1384 Status : Stable
|
|
1385
|
|
1386 =cut
|
|
1387
|
|
1388 sub display_id {
|
|
1389 my $self = shift;
|
|
1390
|
|
1391 my $dnafrag = $self->dnafrag;
|
|
1392 return "" unless($dnafrag);
|
|
1393 my $id = join(':',
|
|
1394 $dnafrag->genome_db->taxon_id,
|
|
1395 $dnafrag->genome_db->dbID,
|
|
1396 $dnafrag->coord_system_name,
|
|
1397 $dnafrag->name,
|
|
1398 $self->dnafrag_start,
|
|
1399 $self->dnafrag_end,
|
|
1400 $self->dnafrag_strand);
|
|
1401 return $id;
|
|
1402 }
|
|
1403
|
|
1404 =head2 reverse_complement
|
|
1405
|
|
1406 Args : none
|
|
1407 Example : none
|
|
1408 Description: reverse complement the object modifing dnafrag_strand and cigar_line
|
|
1409 Returntype : none
|
|
1410 Exceptions : none
|
|
1411 Caller : general
|
|
1412 Status : Stable
|
|
1413
|
|
1414 =cut
|
|
1415
|
|
1416 sub reverse_complement {
|
|
1417 my ($self) = @_;
|
|
1418
|
|
1419 # reverse strand
|
|
1420 #$self->dnafrag_strand($self->dnafrag_strand * -1);
|
|
1421 $self->dnafrag_strand($self->{'dnafrag_strand'} * -1);
|
|
1422
|
|
1423 # reverse original and aligned sequences if cached
|
|
1424 my $original_sequence = $self->{'original_sequence'};
|
|
1425 if ($original_sequence) {
|
|
1426 $original_sequence = reverse $original_sequence;
|
|
1427 $original_sequence =~ tr/ATCGatcg/TAGCtagc/;
|
|
1428 $self->original_sequence($original_sequence);
|
|
1429 }
|
|
1430 my $aligned_sequence = $self->{'aligned_sequence'};
|
|
1431 if ($aligned_sequence) {
|
|
1432 $aligned_sequence = reverse $aligned_sequence;
|
|
1433 $aligned_sequence =~ tr/ATCGatcg/TAGCtagc/;
|
|
1434 $self->aligned_sequence($aligned_sequence);
|
|
1435 }
|
|
1436
|
|
1437 # reverse cigar_string as consequence
|
|
1438 my $cigar_line = $self->{'cigar_line'};
|
|
1439
|
|
1440 #$cigar_line = join("", reverse grep {$_} split(/(\d*[GDMIX])/, $cigar_line));
|
|
1441 $cigar_line = join("", reverse ($cigar_line=~(/(\d*[GDMIX])/g)));
|
|
1442 $self->cigar_line($cigar_line);
|
|
1443 }
|
|
1444
|
|
1445
|
|
1446 =head2 get_Mapper
|
|
1447
|
|
1448 Arg[1] : [optional] integer $cache (default = FALSE)
|
|
1449 Arg[2] : [optional] boolean $condensed (default = FALSE)
|
|
1450 Example : $this_mapper = $genomic_align->get_Mapper();
|
|
1451 Example : $mapper1 = $genomic_align1->get_Mapper();
|
|
1452 $mapper2 = $genomic_align2->get_Mapper();
|
|
1453 Description: creates and returns a Bio::EnsEMBL::Mapper to map coordinates from
|
|
1454 the original sequence of this Bio::EnsEMBL::Compara::GenomicAlign
|
|
1455 to the aligned sequence, i.e. the alignment. In order to map a sequence
|
|
1456 from this Bio::EnsEMBL::Compara::GenomicAlign object to another
|
|
1457 Bio::EnsEMBL::Compara::GenomicAlign of the same
|
|
1458 Bio::EnsEMBL::Compara::GenomicAlignBlock object, you may use this mapper
|
|
1459 to transform coordinates into the "alignment" coordinates and then to
|
|
1460 the other Bio::EnsEMBL::Compara::GenomicAlign coordinates using the
|
|
1461 corresponding Bio::EnsEMBL::Mapper.
|
|
1462 The coordinates of the "alignment" starts with the start
|
|
1463 position of the GenomicAlignBlock if available or 1 otherwise.
|
|
1464 With the $cache argument you can decide whether you want to cache the
|
|
1465 result or not. Result is *not* cached by default.
|
|
1466 Returntype : Bio::EnsEMBL::Mapper object
|
|
1467 Exceptions : throw if no cigar_line can be found
|
|
1468 Status : Stable
|
|
1469
|
|
1470 =cut
|
|
1471
|
|
1472 sub get_Mapper {
|
|
1473 my ($self, $cache, $condensed) = @_;
|
|
1474 my $mapper;
|
|
1475 $cache = 0 if (!defined($cache));
|
|
1476 my $mode = "expanded";
|
|
1477 if (defined($condensed) and $condensed) {
|
|
1478 $mode = "condensed";
|
|
1479 }
|
|
1480
|
|
1481 if (!defined($self->{$mode.'_mapper'})) {
|
|
1482 if ($mode eq "condensed") {
|
|
1483
|
|
1484 $mapper = Bio::EnsEMBL::Mapper->new("sequence", "alignment");
|
|
1485
|
|
1486 my $rel_strand = $self->dnafrag_strand; # This call ensures all direct attribs have been fetched
|
|
1487 my $ref_cigar_line = $self->genomic_align_block->reference_genomic_align->cigar_line;
|
|
1488
|
|
1489 my $aln_pos = (eval{$self->genomic_align_block->start} or 1);
|
|
1490
|
|
1491 #if the reference genomic_align, I only need a simple 1 to 1 mapping
|
|
1492 if ($self eq $self->genomic_align_block->reference_genomic_align) {
|
|
1493 $mapper->add_map_coordinates(
|
|
1494 'sequence',
|
|
1495 $self->{'dnafrag_start'},
|
|
1496 $self->{'dnafrag_end'},
|
|
1497 $self->{'dnafrag_strand'},
|
|
1498 'alignment',
|
|
1499 $self->genomic_align_block->start,
|
|
1500 $self->genomic_align_block->end,
|
|
1501 );
|
|
1502 return $mapper if (!$cache);
|
|
1503
|
|
1504 $self->{$mode.'_mapper'} = $mapper;
|
|
1505 return $self->{$mode.'_mapper'};
|
|
1506 }
|
|
1507
|
|
1508 my $aln_seq_pos = 0;
|
|
1509 my $seq_pos = 0;
|
|
1510
|
|
1511 my $insertions = 0;
|
|
1512 my $target_cigar_pieces;
|
|
1513 @$target_cigar_pieces = $self->{'cigar_line'} =~ /(\d*[GMDXI])/g;
|
|
1514 my $ref_cigar_pieces;
|
|
1515 @$ref_cigar_pieces = $ref_cigar_line =~ /(\d*[GMDXI])/g;
|
|
1516 my $i = 0;
|
|
1517 my $j = 0;
|
|
1518 my ($ref_num, $ref_type) = $ref_cigar_pieces->[$i] =~ /(\d*)([GMDXI])/;
|
|
1519 $ref_num = 1 if (!defined($ref_num) or $ref_num eq "");
|
|
1520 my ($target_num, $target_type) = $target_cigar_pieces->[$j] =~ /(\d*)([GMDXI])/;
|
|
1521 $target_num = 1 if (!defined($target_num) or $target_num eq "");
|
|
1522
|
|
1523 while ($i < @$ref_cigar_pieces and $j<@$target_cigar_pieces) {
|
|
1524 while ($ref_type eq "I") {
|
|
1525 $aln_pos += $ref_num;
|
|
1526 $i++;
|
|
1527 last if ($i >= @$ref_cigar_pieces);
|
|
1528 ($ref_num, $ref_type) = $ref_cigar_pieces->[$i] =~ /(\d*)([GMDXI])/;
|
|
1529 $ref_num = 1 if (!defined($ref_num) or $ref_num eq "");
|
|
1530 }
|
|
1531 while ($target_type eq "I") {
|
|
1532 $seq_pos += $target_num;
|
|
1533 $j++;
|
|
1534 last if ($j >= @$target_cigar_pieces);
|
|
1535 ($target_num, $target_type) = $target_cigar_pieces->[$j] =~ /(\d*)([GMDXI])/;
|
|
1536 $target_num = 1 if (!defined($target_num) or $target_num eq "");
|
|
1537 }
|
|
1538
|
|
1539 my $length;
|
|
1540
|
|
1541 if ($ref_num == $target_num) {
|
|
1542 $length = $ref_num;
|
|
1543 } elsif ($ref_num > $target_num) {
|
|
1544 $length = $target_num;
|
|
1545 } elsif ($ref_num < $target_num) {
|
|
1546 $length = $ref_num;
|
|
1547 }
|
|
1548 my $this_piece_of_cigar_line = $length.$target_type;
|
|
1549
|
|
1550 if ($ref_type eq "M") {
|
|
1551 my $this_mapper;
|
|
1552 if ($rel_strand == 1) {
|
|
1553 _add_cigar_line_to_Mapper($this_piece_of_cigar_line, $aln_pos,
|
|
1554 $seq_pos + $self->dnafrag_start, 1, $mapper);
|
|
1555 } else {
|
|
1556 _add_cigar_line_to_Mapper($this_piece_of_cigar_line, $aln_pos, $self->dnafrag_end - $seq_pos, -1, $mapper);
|
|
1557 }
|
|
1558 $aln_pos += $length;
|
|
1559 }
|
|
1560 my $gaps = 0;
|
|
1561 if ($target_type eq "D" || $target_type eq "X") {
|
|
1562 $gaps += $length;
|
|
1563 }
|
|
1564
|
|
1565 $seq_pos -= $gaps;
|
|
1566 $seq_pos += $length;
|
|
1567
|
|
1568 if ($ref_num == $target_num) {
|
|
1569 $i++;
|
|
1570 $j++;
|
|
1571 last if ($i >= @$ref_cigar_pieces);
|
|
1572 last if ($j >= @$target_cigar_pieces);
|
|
1573 ($ref_num, $ref_type) = $ref_cigar_pieces->[$i] =~ /(\d*)([GMDXI])/;
|
|
1574 $ref_num = 1 if (!defined($ref_num) or $ref_num eq "");
|
|
1575 ($target_num, $target_type) = $target_cigar_pieces->[$j] =~ /(\d*)([GMDXI])/;
|
|
1576 $target_num = 1 if (!defined($target_num) or $target_num eq "");
|
|
1577 } elsif ($ref_num > $target_num) {
|
|
1578 $j++;
|
|
1579 $ref_num -= $target_num;
|
|
1580 last if ($j >= @$target_cigar_pieces);
|
|
1581 ($target_num, $target_type) = $target_cigar_pieces->[$j] =~ /(\d*)([GMDXI])/;
|
|
1582 $target_num = 1 if (!defined($target_num) or $target_num eq "");
|
|
1583 } elsif ($ref_num < $target_num) {
|
|
1584 $i++;
|
|
1585 $target_num -= $ref_num;
|
|
1586 last if ($i >= @$ref_cigar_pieces);
|
|
1587 ($ref_num, $ref_type) = $ref_cigar_pieces->[$i] =~ /(\d*)([GMDXI])/;
|
|
1588 $ref_num = 1 if (!defined($ref_num) or $ref_num eq "");
|
|
1589 }
|
|
1590 }
|
|
1591 } else {
|
|
1592 my $cigar_line = $self->cigar_line;
|
|
1593 if (!$cigar_line) {
|
|
1594 throw("[$self] has no cigar_line and cannot be retrieved by any means");
|
|
1595 }
|
|
1596 my $alignment_position = (eval{$self->genomic_align_block->start} or 1);
|
|
1597 my $sequence_position = $self->dnafrag_start;
|
|
1598 my $rel_strand = $self->dnafrag_strand;
|
|
1599 if ($rel_strand == 1) {
|
|
1600 $sequence_position = $self->dnafrag_start;
|
|
1601 } else {
|
|
1602 $sequence_position = $self->dnafrag_end;
|
|
1603 }
|
|
1604 $mapper = _get_Mapper_from_cigar_line($cigar_line, $alignment_position, $sequence_position, $rel_strand);
|
|
1605 }
|
|
1606
|
|
1607 return $mapper if (!$cache);
|
|
1608
|
|
1609 $self->{$mode.'_mapper'} = $mapper;
|
|
1610 }
|
|
1611
|
|
1612 return $self->{$mode.'_mapper'};
|
|
1613 }
|
|
1614
|
|
1615 sub get_MapperOLD {
|
|
1616 my ($self, $cache, $condensed) = @_;
|
|
1617 my $mapper;
|
|
1618 $cache = 0 if (!defined($cache));
|
|
1619 my $mode = "expanded";
|
|
1620 if (defined($condensed) and $condensed) {
|
|
1621 $mode = "condensed";
|
|
1622 }
|
|
1623
|
|
1624 if (!defined($self->{$mode.'_mapper'})) {
|
|
1625 if ($mode eq "condensed") {
|
|
1626 $mapper = Bio::EnsEMBL::Mapper->new("sequence", "alignment");
|
|
1627 my $rel_strand = $self->dnafrag_strand;
|
|
1628 my $ref_cigar_line = $self->genomic_align_block->reference_genomic_align->cigar_line;
|
|
1629 my $this_aligned_seq = $self->aligned_sequence("+FAKE_SEQ");
|
|
1630
|
|
1631 my $aln_pos = (eval{$self->genomic_align_block->start} or 1);
|
|
1632 my $aln_seq_pos = 0;
|
|
1633 my $seq_pos = 0;
|
|
1634
|
|
1635 my $target_cigar_pieces;
|
|
1636 @$target_cigar_pieces = $self->cigar_line =~ /(\d*[GMDXI])/g;
|
|
1637
|
|
1638 my $insertions = 0;
|
|
1639 my $array_index = 0;
|
|
1640 my $this_target_pos = 0;
|
|
1641 foreach my $cigar_piece ($ref_cigar_line =~ /(\d*[GMDX])/g) {
|
|
1642 my ($cig_count, $cig_mode) = $cigar_piece =~ /(\d*)([GMDX])/;
|
|
1643 $cig_count = 1 if (!defined($cig_count) or $cig_count eq "");
|
|
1644
|
|
1645 #because of 2X genomes, need different method for extracting the
|
|
1646 #cigar_line
|
|
1647 #my $this_piece_of_cigar_line = _get_sub_cigar_line_slow($target_cigar_pieces, $aln_seq_pos, $cig_count);
|
|
1648
|
|
1649 #quicker method which keeps track of how far through the
|
|
1650 #target_cigar_pieces array we are
|
|
1651 my $this_piece_of_cigar_line;
|
|
1652 ($this_piece_of_cigar_line,$array_index, $this_target_pos) = _get_sub_cigar_line($target_cigar_pieces, $aln_seq_pos, $cig_count, $array_index, $this_target_pos);
|
|
1653
|
|
1654 #find number of each cigar_line mode in cigar_line
|
|
1655 my $num_cigar_elements = _count_cigar_elements($this_piece_of_cigar_line);
|
|
1656 if ($cig_mode eq "M") {
|
|
1657
|
|
1658 my $this_mapper;
|
|
1659 if ($rel_strand == 1) {
|
|
1660 $this_mapper = _get_Mapper_from_cigar_line($this_piece_of_cigar_line, $aln_pos,
|
|
1661 $seq_pos + $self->dnafrag_start, 1);
|
|
1662 } else {
|
|
1663 $this_mapper = _get_Mapper_from_cigar_line($this_piece_of_cigar_line, $aln_pos,
|
|
1664 $self->dnafrag_end - $seq_pos, -1);
|
|
1665 }
|
|
1666 $mapper->add_Mapper($this_mapper);
|
|
1667 $aln_pos += $cig_count;
|
|
1668
|
|
1669 $insertions = $num_cigar_elements->{"I"};
|
|
1670
|
|
1671 $seq_pos += $insertions;
|
|
1672 }
|
|
1673 my $gaps = $num_cigar_elements->{"D"};
|
|
1674 $gaps += $num_cigar_elements->{"X"};
|
|
1675
|
|
1676 $seq_pos -= $gaps;
|
|
1677 $seq_pos += $cig_count;
|
|
1678 $aln_seq_pos += $cig_count;
|
|
1679 }
|
|
1680
|
|
1681 } else {
|
|
1682 my $cigar_line = $self->cigar_line;
|
|
1683 if (!$cigar_line) {
|
|
1684 throw("[$self] has no cigar_line and cannot be retrieved by any means");
|
|
1685 }
|
|
1686 my $alignment_position = (eval{$self->genomic_align_block->start} or 1);
|
|
1687 my $sequence_position = $self->dnafrag_start;
|
|
1688 my $rel_strand = $self->dnafrag_strand;
|
|
1689 if ($rel_strand == 1) {
|
|
1690 $sequence_position = $self->dnafrag_start;
|
|
1691 } else {
|
|
1692 $sequence_position = $self->dnafrag_end;
|
|
1693 }
|
|
1694 $mapper = _get_Mapper_from_cigar_line($cigar_line, $alignment_position, $sequence_position, $rel_strand);
|
|
1695 }
|
|
1696
|
|
1697 return $mapper if (!$cache);
|
|
1698
|
|
1699 $self->{$mode.'_mapper'} = $mapper;
|
|
1700 }
|
|
1701
|
|
1702 return $self->{$mode.'_mapper'};
|
|
1703 }
|
|
1704
|
|
1705 =head2 _count_cigar_elements
|
|
1706
|
|
1707 Arg[1] : string $cigar_line
|
|
1708 Example : $num_elements = _count_cigar_elements("5M3D2M5D")
|
|
1709 Description: Counts the number of each cigar_line mode in a cigar_line
|
|
1710 and stores them in a hash reference. In the above example
|
|
1711 $num_elements->{"M"} is 7, $num_elements->{"D"} is 8
|
|
1712 Returntype : hash reference
|
|
1713 Exceptions : None
|
|
1714 Status : At risk
|
|
1715
|
|
1716 =cut
|
|
1717 sub _count_cigar_elements {
|
|
1718 my ($cigar_line) = @_;
|
|
1719
|
|
1720 my $this_count = 0;
|
|
1721 my $num_elements;
|
|
1722
|
|
1723 #initialise each element to 0
|
|
1724 foreach my $mode (qw(G M D X I)) {
|
|
1725 $num_elements->{$mode} = 0;
|
|
1726 }
|
|
1727
|
|
1728 foreach my $cigar_piece ($cigar_line =~ /(\d*[GMDXI])/g) {
|
|
1729 my ($cig_count, $cig_mode) = $cigar_piece =~ /(\d*)([GMDXI])/;
|
|
1730 $cig_count = 1 if (!defined($cig_count) or $cig_count eq "");
|
|
1731 $num_elements->{$cig_mode} += $cig_count;
|
|
1732 }
|
|
1733 return $num_elements;
|
|
1734 }
|
|
1735
|
|
1736 =head2 _get_sub_cigar_line
|
|
1737
|
|
1738 Arg[1] : ref to array of target cigar_line elements
|
|
1739 Arg[2] : int $offset start position
|
|
1740 Arg[3] : int $length amount to extract
|
|
1741 Arg[4] : int $start_array_index current element in target array
|
|
1742 Arg[5] : int $start_target_pos current position in target coords
|
|
1743 Example : my $new_cigar_line = _get_sub_cigar_line($target_cigar_pieces, $pos, $count);
|
|
1744 Description: Extracts a cigar_line of size $length starting at $offset
|
|
1745 Returntype : string
|
|
1746 Exceptions : None
|
|
1747 Status : At risk
|
|
1748
|
|
1749 =cut
|
|
1750 sub _get_sub_cigar_line {
|
|
1751 my ($target_cigar_pieces, $offset, $length, $start_array_index, $start_target_pos) = @_;
|
|
1752 my $ref_pos = $offset + $length;
|
|
1753
|
|
1754 my $i = $start_array_index;
|
|
1755 my $target_pos = $start_target_pos;
|
|
1756
|
|
1757 #current target element
|
|
1758 my ($target_cig_count, $target_cig_mode) = $target_cigar_pieces->[$i] =~ /(\d*)([GMDXI])/;
|
|
1759 $target_cig_count = 1 if (!defined($target_cig_count) or $target_cig_count eq "");
|
|
1760
|
|
1761 my $new_cigar_line = "";
|
|
1762 #check to see if previous target overlaps this ref_pos
|
|
1763 if ($offset) {
|
|
1764 if ($target_pos > $offset) {
|
|
1765
|
|
1766 #need to only add on cig_count amount
|
|
1767 my $new_count;
|
|
1768 if ($target_pos - $offset < $length) {
|
|
1769 $new_count = ($target_pos - $offset);
|
|
1770 } else {
|
|
1771 $new_count = $length;
|
|
1772 }
|
|
1773 #$new_cigar_line .= $new_count . $target_cig_mode;
|
|
1774 $new_cigar_line .= _cigar_element($target_cig_mode,$new_count);
|
|
1775 #print "here1 $target_cig_mode $new_count\n";
|
|
1776 }
|
|
1777 #increment to next target element
|
|
1778 $i++;
|
|
1779 }
|
|
1780 while ($target_pos < $ref_pos && $i < @$target_cigar_pieces) {
|
|
1781 ($target_cig_count, $target_cig_mode) = $target_cigar_pieces->[$i++] =~ /(\d*)([GMDXI])/;
|
|
1782 $target_cig_count = 1 if (!defined($target_cig_count) or $target_cig_count eq "");
|
|
1783
|
|
1784 #first piece
|
|
1785 if (!$target_pos) {
|
|
1786 if ($target_cig_count >= $length) {
|
|
1787 $new_cigar_line .= _cigar_element($target_cig_mode,$length);
|
|
1788 } else {
|
|
1789 $new_cigar_line .= _cigar_element($target_cig_mode,$target_cig_count);
|
|
1790 }
|
|
1791 } else {
|
|
1792 if ($target_cig_mode ne "I" &&
|
|
1793 $target_cig_count + $target_pos > $ref_pos) {
|
|
1794 #if new target piece extends beyond ref_piece but is not I
|
|
1795 #(since this doesn't count to target_pos) need to shorten it
|
|
1796 my $count = $ref_pos - $target_pos;
|
|
1797 $new_cigar_line .= _cigar_element($target_cig_mode,$count);
|
|
1798 } else {
|
|
1799 $new_cigar_line .= _cigar_element($target_cig_mode,$target_cig_count);
|
|
1800 }
|
|
1801 }
|
|
1802 $target_pos += $target_cig_count unless $target_cig_mode eq "I";
|
|
1803 }
|
|
1804 #need to check if the next element is an I which doesn't count to
|
|
1805 #target_pos but need to add it to cigar_line
|
|
1806 if ($i < @$target_cigar_pieces) {
|
|
1807 ($target_cig_count, $target_cig_mode) = $target_cigar_pieces->[$i] =~ /(\d*)([GMDXI])/;
|
|
1808 $target_cig_count = 1 if (!defined($target_cig_count) or $target_cig_count eq "");
|
|
1809 if ($target_cig_mode eq "I") {
|
|
1810 $new_cigar_line .= _cigar_element($target_cig_mode,$target_cig_count);
|
|
1811 }
|
|
1812 }
|
|
1813
|
|
1814 #decrement to return current target element
|
|
1815 if ( $i > 0) {
|
|
1816 $i--;
|
|
1817 }
|
|
1818 return ($new_cigar_line, $i, $target_pos);
|
|
1819 }
|
|
1820
|
|
1821 sub _get_sub_cigar_line_slow {
|
|
1822 my ($target_cigar_pieces, $offset, $length) = @_;
|
|
1823 my $i = 0;
|
|
1824 my $ref_pos = $offset + $length;
|
|
1825 my ($target_cig_count, $target_cig_mode);
|
|
1826 my $target_pos = 0;
|
|
1827
|
|
1828 #skip through target_cigar_line until get to correct position
|
|
1829 while ($target_pos < $offset && $i < @$target_cigar_pieces) {
|
|
1830 ($target_cig_count, $target_cig_mode) = $target_cigar_pieces->[$i++] =~ /(\d*)([GMDXI])/;
|
|
1831 $target_cig_count = 1 if (!defined($target_cig_count) or $target_cig_count eq "");
|
|
1832
|
|
1833 $target_pos += $target_cig_count unless $target_cig_mode eq "I";
|
|
1834 }
|
|
1835
|
|
1836 my $new_cigar_line = "";
|
|
1837 #check to see if previous target overlaps this ref_pos
|
|
1838 if ($offset) {
|
|
1839 if ($target_pos > $offset) {
|
|
1840
|
|
1841 #need to only add on cig_count amount
|
|
1842 my $new_count;
|
|
1843 if ($target_pos - $offset < $length) {
|
|
1844 $new_count = ($target_pos - $offset);
|
|
1845 } else {
|
|
1846 $new_count = $length;
|
|
1847 }
|
|
1848 #$new_cigar_line .= $new_count . $target_cig_mode;
|
|
1849 $new_cigar_line .= _cigar_element($target_cig_mode,$new_count);
|
|
1850 }
|
|
1851 }
|
|
1852
|
|
1853 while ($target_pos < $ref_pos && $i < @$target_cigar_pieces) {
|
|
1854 ($target_cig_count, $target_cig_mode) = $target_cigar_pieces->[$i++] =~ /(\d*)([GMDXI])/;
|
|
1855 $target_cig_count = 1 if (!defined($target_cig_count) or $target_cig_count eq "");
|
|
1856
|
|
1857 #first piece
|
|
1858 if (!$target_pos) {
|
|
1859 if ($target_cig_count >= $length) {
|
|
1860 $new_cigar_line .= _cigar_element($target_cig_mode,$length);
|
|
1861 } else {
|
|
1862 $new_cigar_line .= _cigar_element($target_cig_mode,$target_cig_count);
|
|
1863 }
|
|
1864 } else {
|
|
1865 if ($target_cig_mode ne "I" &&
|
|
1866 $target_cig_count + $target_pos > $ref_pos) {
|
|
1867 #if new target piece extends beyond ref_piece but is not I
|
|
1868 #(since this doesn't count to target_pos) need to shorten it
|
|
1869 my $count = $ref_pos - $target_pos;
|
|
1870 $new_cigar_line .= _cigar_element($target_cig_mode,$count);
|
|
1871 } else {
|
|
1872 $new_cigar_line .= _cigar_element($target_cig_mode,$target_cig_count);
|
|
1873 }
|
|
1874 }
|
|
1875 $target_pos += $target_cig_count unless $target_cig_mode eq "I";
|
|
1876 }
|
|
1877 #need to check if the next element is an I which doesn't count to
|
|
1878 #target_pos but need to add it to cigar_line
|
|
1879 if ($i < @$target_cigar_pieces) {
|
|
1880 ($target_cig_count, $target_cig_mode) = $target_cigar_pieces->[$i++] =~ /(\d*)([GMDXI])/;
|
|
1881 $target_cig_count = 1 if (!defined($target_cig_count) or $target_cig_count eq "");
|
|
1882 if ($target_cig_mode eq "I") {
|
|
1883 $new_cigar_line .= _cigar_element($target_cig_mode,$target_cig_count);
|
|
1884 }
|
|
1885 }
|
|
1886 return $new_cigar_line;
|
|
1887 }
|
|
1888
|
|
1889 =head2 _cigar_element
|
|
1890
|
|
1891 Arg[1] : char $mode valid cigar_line mode
|
|
1892 Arg[2] : int $length size of element
|
|
1893 Example : $elem = _cigar_element("M", 5);
|
|
1894 Description: Creates a valid cigar element
|
|
1895 Returntype : integer
|
|
1896 Exceptions : None
|
|
1897 Status : At risk
|
|
1898
|
|
1899 =cut
|
|
1900 sub _cigar_element {
|
|
1901 my ($mode, $len) = @_;
|
|
1902 my $elem;
|
|
1903 if ($len == 1) {
|
|
1904 $elem = $mode;
|
|
1905 #} elsif ($len > 1) { #length can be 0 if the sequence starts with a gap
|
|
1906 } else { #length can be 0 if the sequence starts with a gap
|
|
1907 $elem = $len.$mode;
|
|
1908 }
|
|
1909 return $elem;
|
|
1910 }
|
|
1911
|
|
1912 =head2 _get_Mapper_from_cigar_line
|
|
1913
|
|
1914 Arg[1] : $cigar_line
|
|
1915 Arg[2] : $alignment_position
|
|
1916 Arg[3] : $sequence_position
|
|
1917 Arg[4] : $relative_strand
|
|
1918 Example : $this_mapper = _get_Mapper_from_cigar_line($cigar_line,
|
|
1919 $aln_pos, $seq_pos, 1);
|
|
1920 Description: creates a new Bio::EnsEMBL::Mapper object for mapping between
|
|
1921 sequence and alignment coordinate systems using the cigar_line
|
|
1922 and starting from the $alignment_position and sequence_position.
|
|
1923 Returntype : Bio::EnsEMBL::Mapper object
|
|
1924 Exceptions : None
|
|
1925 Status : Stable
|
|
1926
|
|
1927 =cut
|
|
1928
|
|
1929 sub _get_Mapper_from_cigar_line {
|
|
1930 my ($cigar_line, $alignment_position, $sequence_position, $rel_strand) = @_;
|
|
1931
|
|
1932 my $mapper = Bio::EnsEMBL::Mapper->new("sequence", "alignment");
|
|
1933
|
|
1934 my @cigar_pieces = ($cigar_line =~ /(\d*[GMDXI])/g);
|
|
1935 if ($rel_strand == 1) {
|
|
1936 foreach my $cigar_piece (@cigar_pieces) {
|
|
1937 my $cigar_type = substr($cigar_piece, -1, 1);
|
|
1938 my $cigar_count = substr($cigar_piece, 0, -1);
|
|
1939 $cigar_count = 1 if ($cigar_count eq "");
|
|
1940 next if ($cigar_count < 1);
|
|
1941
|
|
1942 if( $cigar_type eq "M" ) {
|
|
1943 $mapper->add_map_coordinates(
|
|
1944 "sequence", #$self->dbID,
|
|
1945 $sequence_position,
|
|
1946 $sequence_position + $cigar_count - 1,
|
|
1947 $rel_strand,
|
|
1948 "alignment", #$self->genomic_align_block->dbID,
|
|
1949 $alignment_position,
|
|
1950 $alignment_position + $cigar_count - 1
|
|
1951 );
|
|
1952 $sequence_position += $cigar_count;
|
|
1953 $alignment_position += $cigar_count;
|
|
1954 } elsif( $cigar_type eq "I") {
|
|
1955 #add to sequence_position but not alignment_position
|
|
1956 $sequence_position += $cigar_count;
|
|
1957 } elsif( $cigar_type eq "G" || $cigar_type eq "D" || $cigar_type eq "X") {
|
|
1958 $alignment_position += $cigar_count;
|
|
1959 }
|
|
1960 }
|
|
1961 } else {
|
|
1962 foreach my $cigar_piece (@cigar_pieces) {
|
|
1963 my $cigar_type = substr($cigar_piece, -1, 1);
|
|
1964 my $cigar_count = substr($cigar_piece, 0 ,-1);
|
|
1965 $cigar_count = 1 if ($cigar_count eq "");
|
|
1966 next if ($cigar_count < 1);
|
|
1967
|
|
1968 if( $cigar_type eq "M" ) {
|
|
1969 $mapper->add_map_coordinates(
|
|
1970 "sequence", #$self->dbID,
|
|
1971 $sequence_position - $cigar_count + 1,
|
|
1972 $sequence_position,
|
|
1973 $rel_strand,
|
|
1974 "alignment", #$self->genomic_align_block->dbID,
|
|
1975 $alignment_position,
|
|
1976 $alignment_position + $cigar_count - 1
|
|
1977 );
|
|
1978 $sequence_position -= $cigar_count;
|
|
1979 $alignment_position += $cigar_count;
|
|
1980 } elsif( $cigar_type eq "I") {
|
|
1981 #add to sequence_position but not alignment_position
|
|
1982 $sequence_position -= $cigar_count;
|
|
1983 } elsif( $cigar_type eq "G" || $cigar_type eq "D" || $cigar_type eq "X") {
|
|
1984 $alignment_position += $cigar_count;
|
|
1985 }
|
|
1986 }
|
|
1987 }
|
|
1988
|
|
1989 return $mapper;
|
|
1990 }
|
|
1991
|
|
1992 sub _add_cigar_line_to_Mapper {
|
|
1993 my ($cigar_line, $alignment_position, $sequence_position, $rel_strand, $mapper) = @_;
|
|
1994
|
|
1995 my @cigar_pieces = ($cigar_line =~ /(\d*[GMDXI])/g);
|
|
1996 if ($rel_strand == 1) {
|
|
1997 foreach my $cigar_piece (@cigar_pieces) {
|
|
1998 my $cigar_type = substr($cigar_piece, -1, 1 );
|
|
1999 my $cigar_count = substr($cigar_piece, 0 ,-1 );
|
|
2000 $cigar_count = 1 unless ($cigar_count =~ /^\d+$/);
|
|
2001 next if ($cigar_count < 1);
|
|
2002
|
|
2003 if( $cigar_type eq "M" ) {
|
|
2004 $mapper->add_map_coordinates(
|
|
2005 "sequence", #$self->dbID,
|
|
2006 $sequence_position,
|
|
2007 $sequence_position + $cigar_count - 1,
|
|
2008 $rel_strand,
|
|
2009 "alignment", #$self->genomic_align_block->dbID,
|
|
2010 $alignment_position,
|
|
2011 $alignment_position + $cigar_count - 1
|
|
2012 );
|
|
2013 $sequence_position += $cigar_count;
|
|
2014 $alignment_position += $cigar_count;
|
|
2015 } elsif( $cigar_type eq "I") {
|
|
2016 #add to sequence_position but not alignment_position
|
|
2017 $sequence_position += $cigar_count;
|
|
2018 } elsif( $cigar_type eq "G" || $cigar_type eq "D" || $cigar_type eq "X") {
|
|
2019 $alignment_position += $cigar_count;
|
|
2020 }
|
|
2021 }
|
|
2022 } else {
|
|
2023 foreach my $cigar_piece (@cigar_pieces) {
|
|
2024 my $cigar_type = substr($cigar_piece, -1, 1 );
|
|
2025 my $cigar_count = substr($cigar_piece, 0 ,-1 );
|
|
2026 $cigar_count = 1 unless ($cigar_count =~ /^\d+$/);
|
|
2027 next if ($cigar_count < 1);
|
|
2028
|
|
2029 if( $cigar_type eq "M" ) {
|
|
2030 $mapper->add_map_coordinates(
|
|
2031 "sequence", #$self->dbID,
|
|
2032 $sequence_position - $cigar_count + 1,
|
|
2033 $sequence_position,
|
|
2034 $rel_strand,
|
|
2035 "alignment", #$self->genomic_align_block->dbID,
|
|
2036 $alignment_position,
|
|
2037 $alignment_position + $cigar_count - 1
|
|
2038 );
|
|
2039 $sequence_position -= $cigar_count;
|
|
2040 $alignment_position += $cigar_count;
|
|
2041 } elsif( $cigar_type eq "I") {
|
|
2042 #add to sequence_position but not alignment_position
|
|
2043 $sequence_position -= $cigar_count;
|
|
2044 } elsif( $cigar_type eq "G" || $cigar_type eq "D" || $cigar_type eq "X") {
|
|
2045 $alignment_position += $cigar_count;
|
|
2046 }
|
|
2047 }
|
|
2048 }
|
|
2049
|
|
2050 return $mapper;
|
|
2051 }
|
|
2052
|
|
2053
|
|
2054 =head2 get_Slice
|
|
2055
|
|
2056 Arg[1] : -none-
|
|
2057 Example : $slice = $genomic_align->get_Slice();
|
|
2058 Description: creates and returns a Bio::EnsEMBL::Slice which corresponds to
|
|
2059 this Bio::EnsEMBL::Compara::GenomicAlign
|
|
2060 Returntype : Bio::EnsEMBL::Slice object
|
|
2061 Exceptions : return -undef- if slice cannot be created (this is likely to
|
|
2062 happen if the Registry is misconfigured)
|
|
2063 Status : Stable
|
|
2064
|
|
2065 =cut
|
|
2066
|
|
2067 sub get_Slice {
|
|
2068 my ($self) = @_;
|
|
2069
|
|
2070 my $slice = $self->dnafrag->slice;
|
|
2071 return undef if (!defined($slice));
|
|
2072
|
|
2073 $slice = $slice->sub_Slice(
|
|
2074 $self->dnafrag_start,
|
|
2075 $self->dnafrag_end,
|
|
2076 $self->dnafrag_strand
|
|
2077 );
|
|
2078
|
|
2079 return $slice;
|
|
2080 }
|
|
2081
|
|
2082
|
|
2083 =head2 restrict
|
|
2084
|
|
2085 Arg[1] : int start
|
|
2086 Arg[1] : int end
|
|
2087 Example : my $genomic_align = $genomic_align->restrict(10, 20);
|
|
2088 Description: restrict (trim) this GenomicAlign to the start and end
|
|
2089 positions (in alignment coordinates). If no trimming is
|
|
2090 required, the original object is returned instead.
|
|
2091 Returntype : Bio::EnsEMBL::Compara::GenomicAlign object
|
|
2092 Exceptions :
|
|
2093 Status : At risk
|
|
2094
|
|
2095 =cut
|
|
2096
|
|
2097 sub restrict {
|
|
2098 my ($self, $start, $end, $aligned_seq_length) = @_;
|
|
2099 throw("Wrong arguments") if (!$start or !$end);
|
|
2100
|
|
2101 my $restricted_genomic_align = $self->copy();
|
|
2102 delete($restricted_genomic_align->{dbID});
|
|
2103 delete($restricted_genomic_align->{genomic_align_block_id});
|
|
2104 delete($restricted_genomic_align->{original_sequence});
|
|
2105 delete($restricted_genomic_align->{aligned_sequence});
|
|
2106 delete($restricted_genomic_align->{cigar_line});
|
|
2107 $restricted_genomic_align->{original_dbID} = $self->dbID if ($self->dbID);
|
|
2108
|
|
2109 # Need to calculate the original aligned sequence length myself
|
|
2110 if (!$aligned_seq_length) {
|
|
2111 my @cigar = grep {$_} split(/(\d*[GDMXI])/, $self->cigar_line);
|
|
2112 foreach my $num_type (@cigar) {
|
|
2113 my $type = substr($num_type, -1, 1, "");
|
|
2114 $num_type = 1 if ($num_type eq "");
|
|
2115 $aligned_seq_length += $num_type unless ($type eq "I");
|
|
2116 }
|
|
2117 }
|
|
2118
|
|
2119 my $final_aligned_length = $end - $start + 1;
|
|
2120 my $number_of_columns_to_trim_from_the_start = $start - 1;
|
|
2121 my $number_of_columns_to_trim_from_the_end = $aligned_seq_length - $end;
|
|
2122
|
|
2123 my @cigar = grep {$_} split(/(\d*[GDMXI])/, $self->cigar_line);
|
|
2124
|
|
2125 ## Trim start of cigar_line if needed
|
|
2126 if ($number_of_columns_to_trim_from_the_start >= 0) {
|
|
2127 my $counter_of_trimmed_columns_from_the_start = 0;
|
|
2128 my $counter_of_trimmed_base_pairs = 0; # num of bp we trim (from the start)
|
|
2129 ## Loop through the cigar pieces
|
|
2130 while (my $cigar = shift(@cigar)) {
|
|
2131 # Parse each cigar piece
|
|
2132 my $type = substr( $cigar, -1, 1 );
|
|
2133 my $num = substr( $cigar, 0 ,-1 );
|
|
2134 $num = 1 if ($num eq "");
|
|
2135
|
|
2136 # Insertions are not part of the alignment, don't count them
|
|
2137 if ($type ne "I") {
|
|
2138 $counter_of_trimmed_columns_from_the_start += $num;
|
|
2139 }
|
|
2140
|
|
2141 # Matches and insertions are actual base pairs in the sequence
|
|
2142 if ($type eq "M" || $type eq "I") {
|
|
2143 $counter_of_trimmed_base_pairs += $num;
|
|
2144 }
|
|
2145
|
|
2146 # If this cigar piece is too long and we overshoot the number of columns we want to trim,
|
|
2147 # we substitute this cigar piece by a shorter one
|
|
2148 if ($counter_of_trimmed_columns_from_the_start >= $number_of_columns_to_trim_from_the_start) {
|
|
2149 my $new_cigar_piece;
|
|
2150 # length of the new cigar piece
|
|
2151 my $length = $counter_of_trimmed_columns_from_the_start - $number_of_columns_to_trim_from_the_start;
|
|
2152 if ($length > 1) {
|
|
2153 $new_cigar_piece = $length.$type;
|
|
2154 } elsif ($length == 1) {
|
|
2155 $new_cigar_piece = $type;
|
|
2156 }
|
|
2157 unshift(@cigar, $new_cigar_piece) if ($new_cigar_piece);
|
|
2158 if ($type eq "M") {
|
|
2159 $counter_of_trimmed_base_pairs -= $length;
|
|
2160 }
|
|
2161
|
|
2162 ## We don't want to start with an insertion. Trim it!
|
|
2163 while (@cigar and $cigar[0] =~ /[I]/) {
|
|
2164 my ($num, $type) = ($cigar[0] =~ /^(\d*)([DIGMX])/);
|
|
2165 #my $type = substr( $cigar, -1, 1 );
|
|
2166 #my $num = substr( $cigar, 0 ,-1 );
|
|
2167 $num = 1 if ($num eq "");
|
|
2168 $counter_of_trimmed_base_pairs += $num;
|
|
2169 shift(@cigar);
|
|
2170 }
|
|
2171 last;
|
|
2172 }
|
|
2173 }
|
|
2174 if ($self->dnafrag_strand == 1) {
|
|
2175 $restricted_genomic_align->dnafrag_start($self->dnafrag_start + $counter_of_trimmed_base_pairs);
|
|
2176 } else {
|
|
2177 $restricted_genomic_align->dnafrag_end($self->dnafrag_end - $counter_of_trimmed_base_pairs);
|
|
2178 }
|
|
2179 }
|
|
2180
|
|
2181 ## Trim end of cigar_line if needed
|
|
2182 if ($number_of_columns_to_trim_from_the_end >= 0) {
|
|
2183 my $counter_of_trimmed_columns_from_the_end = 0;
|
|
2184 my $counter_of_trimmed_base_pairs = 0; # num of bp we trim (from the start)
|
|
2185 ## Loop through the cigar pieces
|
|
2186 while (my $cigar = pop(@cigar)) {
|
|
2187 # Parse each cigar piece
|
|
2188 my $type = substr( $cigar, -1, 1 );
|
|
2189 my $num = substr( $cigar, 0 ,-1 );
|
|
2190 $num = 1 if ($num eq "");
|
|
2191
|
|
2192 # Insertions are not part of the alignment, don't count them
|
|
2193 if ($type ne "I") {
|
|
2194 $counter_of_trimmed_columns_from_the_end += $num;
|
|
2195 }
|
|
2196
|
|
2197 # Matches and insertions are actual base pairs in the sequence
|
|
2198 if ($type eq "M" || $type eq "I") {
|
|
2199 $counter_of_trimmed_base_pairs += $num;
|
|
2200 }
|
|
2201 # If this cigar piece is too long and we overshoot the number of columns we want to trim,
|
|
2202 # we substitute this cigar piece by a shorter one
|
|
2203 if ($counter_of_trimmed_columns_from_the_end >= $number_of_columns_to_trim_from_the_end) {
|
|
2204 my $new_cigar_piece;
|
|
2205 # length of the new cigar piece
|
|
2206 my $length = $counter_of_trimmed_columns_from_the_end - $number_of_columns_to_trim_from_the_end;
|
|
2207 if ($length > 1) {
|
|
2208 $new_cigar_piece = $length.$type;
|
|
2209 } elsif ($length == 1) {
|
|
2210 $new_cigar_piece = $type;
|
|
2211 }
|
|
2212 push(@cigar, $new_cigar_piece) if ($new_cigar_piece);
|
|
2213 if ($type eq "M") {
|
|
2214 $counter_of_trimmed_base_pairs -= $length;
|
|
2215 }
|
|
2216
|
|
2217 ## We don't want to end with an insertion. Trim it!
|
|
2218 while (@cigar and $cigar[-1] =~ /[I]/) {
|
|
2219 my ($num, $type) = ($cigar[-1] =~ /^(\d*)([DIGMX])/);
|
|
2220 #my $type = substr( $cigar, -1, 1 );
|
|
2221 #my $num = substr( $cigar, 0 ,-1 );
|
|
2222 $num = 1 if ($num eq "");
|
|
2223 $counter_of_trimmed_base_pairs += $num;
|
|
2224 pop(@cigar);
|
|
2225 }
|
|
2226 last;
|
|
2227 }
|
|
2228 }
|
|
2229 if ($self->dnafrag_strand == 1) {
|
|
2230 $restricted_genomic_align->dnafrag_end($restricted_genomic_align->dnafrag_end - $counter_of_trimmed_base_pairs);
|
|
2231 } else {
|
|
2232 $restricted_genomic_align->dnafrag_start($restricted_genomic_align->dnafrag_start + $counter_of_trimmed_base_pairs);
|
|
2233 }
|
|
2234 }
|
|
2235
|
|
2236 ## Save genomic_align's cigar_line
|
|
2237 $restricted_genomic_align->aligned_sequence(0);
|
|
2238 $restricted_genomic_align->cigar_line(join("", @cigar));
|
|
2239
|
|
2240 return $restricted_genomic_align;
|
|
2241 }
|
|
2242
|
|
2243 1;
|