0
|
1 # $Id: GeneMapper.pm,v 1.13.2.2 2003/03/13 11:56:30 heikki Exp $
|
|
2 #
|
|
3 # bioperl module for Bio::Coordinate::GeneMapper
|
|
4 #
|
|
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
|
|
6 #
|
|
7 # Copyright Heikki Lehvaslaiho
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Coordinate::GeneMapper - transformations between gene related coordinate systems
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 use Bio::Coordinate::GeneMapper;
|
|
20
|
|
21 # get a Bio::RangeI representing the start, end and strand of the CDS
|
|
22 # in chromosomal (or entry) coordinates
|
|
23 my $cds;
|
|
24
|
|
25 # get a Bio::Location::Split or an array of Bio::LocationI objects
|
|
26 # holding the start, end and strand of all the exons in chromosomal
|
|
27 # (or entry) coordinates
|
|
28 my $exons;
|
|
29
|
|
30 # create a gene mapper and set it to map from chromosomal to cds coordinates
|
|
31 my $gene = Bio::Coordinate::GeneMapper->new(-in=>'chr',
|
|
32 -out=>'cds',
|
|
33 -cds=>$cds,
|
|
34 -exons=>$exons
|
|
35 );
|
|
36
|
|
37 # get a a Bio::Location or sequence feature in input (chr) coordinates
|
|
38 my $loc;
|
|
39
|
|
40 # map the location into output coordinates and get a new location object
|
|
41 $newloc = $gene->map($loc);
|
|
42
|
|
43
|
|
44 =head1 DESCRIPTION
|
|
45
|
|
46 Bio::Coordinate::GeneMapper is a module for simplifying the mappings
|
|
47 of coodinate locations between various gene related locations in human
|
|
48 genetics. It also adds a special human genetics twist to coordinate
|
|
49 systems by making it possible to disable the use of zero
|
|
50 (0). Locations before position one start from -1. See method
|
|
51 L<nozero>.
|
|
52
|
|
53 It understands by name the following coordinate systems and mapping
|
|
54 between them:
|
|
55
|
|
56 peptide (peptide length)
|
|
57 ^
|
|
58 | -peptide_offset
|
|
59 |
|
|
60 frame propeptide (propeptide length)
|
|
61 ^ ^
|
|
62 \ |
|
|
63 translate \ |
|
|
64 \ |
|
|
65 cds (transcript start and end)
|
|
66 ^
|
|
67 negative_intron | \
|
|
68 ^ | \ transcribe
|
|
69 \ | \
|
|
70 intron exon \
|
|
71 ^ ^ ^ /
|
|
72 splice \ \ / | /
|
|
73 \ \ / | /
|
|
74 \ inex | /
|
|
75 \ ^ | /
|
|
76 \ \ |/
|
|
77 ----- gene (gene_length)
|
|
78 ^
|
|
79 | - gene_offset
|
|
80 |
|
|
81 chr (or entry)
|
|
82
|
|
83
|
|
84 This structure is kept in the global variable $DAG which is a
|
|
85 representation of a Directed Acyclic Graph. The path calculations
|
|
86 traversing this graph are done in a helper class. See
|
|
87 L<Bio::Coordinate::Graph>.
|
|
88
|
|
89 Of these, two operations are special cases, translate and splice.
|
|
90 Translating and reverse translating are implemented as internal
|
|
91 methods that do the simple 1E<lt>-E<gt>3 conversion. Splicing needs
|
|
92 additional information that is provided by method L<exons> which takes
|
|
93 in an array of Bio::LocationI objects.
|
|
94
|
|
95 Most of the coordinate system names should be selfexplanatory to
|
|
96 anyone familiar with genes. Negative intron coordinate system is
|
|
97 starts counting backwards from -1 as the last nucleotide in the
|
|
98 intron. This used when only exon and a few flanking intron nucleotides
|
|
99 are known.
|
|
100
|
|
101
|
|
102 This class models coordinates within one transcript of a gene, so to
|
|
103 tackle multiple transcripts you need several instances of the
|
|
104 class. It is therefore valid to argue that the name of the class
|
|
105 should be TranscriptMapper. GeneMapper is a catchier name, so it
|
|
106 stuck.
|
|
107
|
|
108
|
|
109 =head1 FEEDBACK
|
|
110
|
|
111 =head2 Mailing Lists
|
|
112
|
|
113 User feedback is an integral part of the evolution of this and other
|
|
114 Bioperl modules. Send your comments and suggestions preferably to the
|
|
115 Bioperl mailing lists Your participation is much appreciated.
|
|
116
|
|
117 bioperl-l@bioperl.org - General discussion
|
|
118 http://bio.perl.org/MailList.html - About the mailing lists
|
|
119
|
|
120 =head2 Reporting Bugs
|
|
121
|
|
122 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
123 the bugs and their resolution. Bug reports can be submitted via email
|
|
124 or the web:
|
|
125
|
|
126 bioperl-bugs@bio.perl.org
|
|
127 http://bugzilla.bioperl.org/
|
|
128
|
|
129 =head1 AUTHOR - Heikki Lehvaslaiho
|
|
130
|
|
131 Email: heikki@ebi.ac.uk
|
|
132 Address:
|
|
133
|
|
134 EMBL Outstation, European Bioinformatics Institute
|
|
135 Wellcome Trust Genome Campus, Hinxton
|
|
136 Cambs. CB10 1SD, United Kingdom
|
|
137
|
|
138 =head1 APPENDIX
|
|
139
|
|
140 The rest of the documentation details each of the object
|
|
141 methods. Internal methods are usually preceded with a _
|
|
142
|
|
143 =cut
|
|
144
|
|
145
|
|
146 # Let the code begin...
|
|
147
|
|
148 package Bio::Coordinate::GeneMapper;
|
|
149 use vars qw(@ISA %COORDINATE_SYSTEMS %COORDINATE_INTS $TRANSLATION $DAG
|
|
150 $NOZERO_VALUES $NOZERO_KEYS);
|
|
151 use strict;
|
|
152
|
|
153 # Object preamble - inherits from Bio::Root::Root
|
|
154
|
|
155 use Bio::Root::Root;
|
|
156 use Bio::Coordinate::Result;
|
|
157 use Bio::Location::Simple;
|
|
158 use Bio::Coordinate::Graph;
|
|
159 use Bio::Coordinate::Collection;
|
|
160 use Bio::Coordinate::Pair;
|
|
161 use Bio::Coordinate::ExtrapolatingPair;
|
|
162
|
|
163 @ISA = qw(Bio::Root::Root Bio::Coordinate::MapperI);
|
|
164
|
|
165 # first set internal values for all translation tables
|
|
166
|
|
167 %COORDINATE_SYSTEMS = (
|
|
168 peptide => 10,
|
|
169 propeptide => 9,
|
|
170 frame => 8,
|
|
171 cds => 7,
|
|
172 negative_intron => 6,
|
|
173 intron => 5,
|
|
174 exon => 4,
|
|
175 inex => 3,
|
|
176 gene => 2,
|
|
177 chr => 1
|
|
178 );
|
|
179
|
|
180 %COORDINATE_INTS = (
|
|
181 10 => 'peptide',
|
|
182 9 => 'propeptide',
|
|
183 8 => 'frame',
|
|
184 7 => 'cds',
|
|
185 6 => 'negative_intron',
|
|
186 5 => 'intron',
|
|
187 4 => 'exon',
|
|
188 3 => 'inex',
|
|
189 2 => 'gene',
|
|
190 1 => 'chr'
|
|
191 );
|
|
192
|
|
193 $TRANSLATION = $COORDINATE_SYSTEMS{'cds'}. "-".
|
|
194 $COORDINATE_SYSTEMS{'propeptide'};
|
|
195
|
|
196 $DAG = {
|
|
197 10 => [],
|
|
198 9 => [10],
|
|
199 8 => [],
|
|
200 7 => [8, 9],
|
|
201 6 => [],
|
|
202 5 => [6],
|
|
203 4 => [7],
|
|
204 3 => [4, 5],
|
|
205 2 => [3, 4, 5, 7],
|
|
206 1 => [2]
|
|
207 };
|
|
208
|
|
209 $NOZERO_VALUES = {0 => 0, 'in' => 1, 'out' => 2, 'in&out' => 3 };
|
|
210 $NOZERO_KEYS = { 0 => 0, 1 => 'in', 2 => 'out', 3 => 'in&out' };
|
|
211
|
|
212
|
|
213 sub new {
|
|
214 my($class,@args) = @_;
|
|
215 my $self = $class->SUPER::new(@args);
|
|
216
|
|
217 # prime the graph
|
|
218 my $graph = new Bio::Coordinate::Graph;
|
|
219 $graph->hash_of_arrays($DAG);
|
|
220 $self->graph($graph);
|
|
221
|
|
222 my($in, $out, $peptide_offset, $exons,
|
|
223 $cds, $nozero, $strict) =
|
|
224 $self->_rearrange([qw(IN
|
|
225 OUT
|
|
226 PEPTIDE_OFFSET
|
|
227 EXONS
|
|
228 CDS
|
|
229 NOZERO
|
|
230 STRICT
|
|
231 )],
|
|
232 @args);
|
|
233
|
|
234 # direction of mapping when going chr to protein
|
|
235 $self->{_direction} = 1;
|
|
236
|
|
237 $in && $self->in($in);
|
|
238 $out && $self->out($out);
|
|
239 $cds && $self->cds($cds);
|
|
240 $exons && ref($exons) =~ /ARRAY/i && $self->exons(@$exons);
|
|
241 $peptide_offset && $self->peptide_offset($peptide_offset);
|
|
242 $nozero && $self->nozero($nozero);
|
|
243 $strict && $self->strict($strict);
|
|
244
|
|
245 return $self; # success - we hope!
|
|
246 }
|
|
247
|
|
248 =head2 in
|
|
249
|
|
250 Title : in
|
|
251 Usage : $obj->in('peptide');
|
|
252 Function: Set and read the input coordinate system.
|
|
253 Example :
|
|
254 Returns : value of input system
|
|
255 Args : new value (optional)
|
|
256
|
|
257 =cut
|
|
258
|
|
259 sub in {
|
|
260 my ($self,$value) = @_;
|
|
261 if( defined $value) {
|
|
262 $self->throw("Not a valid input coordinate system name [$value]\n".
|
|
263 "Valid values are ". join(", ", keys %COORDINATE_SYSTEMS ))
|
|
264 unless defined $COORDINATE_SYSTEMS{$value};
|
|
265
|
|
266 $self->{'_in'} = $COORDINATE_SYSTEMS{$value};
|
|
267 }
|
|
268 return $COORDINATE_INTS{ $self->{'_in'} };
|
|
269 }
|
|
270
|
|
271
|
|
272 =head2 out
|
|
273
|
|
274 Title : out
|
|
275 Usage : $obj->out('peptide');
|
|
276 Function: Set and read the output coordinate system.
|
|
277 Example :
|
|
278 Returns : value of output system
|
|
279 Args : new value (optional)
|
|
280
|
|
281 =cut
|
|
282
|
|
283 sub out {
|
|
284 my ($self,$value) = @_;
|
|
285 if( defined $value) {
|
|
286 $self->throw("Not a valid input coordinate system name [$value]\n".
|
|
287 "Valid values are ". join(", ", keys %COORDINATE_SYSTEMS ))
|
|
288 unless defined $COORDINATE_SYSTEMS{$value};
|
|
289
|
|
290 $self->{'_out'} = $COORDINATE_SYSTEMS{$value};
|
|
291 }
|
|
292 return $COORDINATE_INTS{ $self->{'_out'} };
|
|
293 }
|
|
294
|
|
295 =head2 strict
|
|
296
|
|
297 Title : strict
|
|
298 Usage : $obj->strict('peptide');
|
|
299 Function: Set and read weather strict boundaried of coordinate
|
|
300 systems are enforced.
|
|
301 When strict is on, the end of the coordinate range must be defined.
|
|
302 Example :
|
|
303 Returns : boolean
|
|
304 Args : boolean (optional)
|
|
305
|
|
306 =cut
|
|
307
|
|
308 sub strict {
|
|
309 my ($self,$value) = @_;
|
|
310 if( defined $value) {
|
|
311 $value ? ( $self->{'_strict'} = 1 ) : ( $self->{'_strict'} = 0 );
|
|
312 ## update in each mapper !!
|
|
313 }
|
|
314 return $self->{'_strict'} || 0 ;
|
|
315 }
|
|
316
|
|
317
|
|
318 =head2 nozero
|
|
319
|
|
320 Title : nozero
|
|
321 Usage : $obj->nozero(1);
|
|
322 Function: Flag to disable the use of zero in the input,
|
|
323 output or both coordinate systems. Use of coordinate
|
|
324 systems without zero is a peculiarity common in
|
|
325 human genetics community.
|
|
326 Example :
|
|
327 Returns : 0 (default), or 'in', 'out', 'in&out'
|
|
328 Args : 0 (default), or 'in', 'out', 'in&out'
|
|
329
|
|
330 =cut
|
|
331
|
|
332 sub nozero {
|
|
333 my ($self,$value) = @_;
|
|
334
|
|
335 if (defined $value) {
|
|
336 $self->throw("Not a valid value for nozero [$value]\n".
|
|
337 "Valid values are ". join(", ", keys %{$NOZERO_VALUES} ))
|
|
338 unless defined $NOZERO_VALUES->{$value};
|
|
339 $self->{'_nozero'} = $NOZERO_VALUES->{$value};
|
|
340 }
|
|
341
|
|
342 my $res = $self->{'_nozero'} || 0;
|
|
343 return $NOZERO_KEYS->{$res};
|
|
344 }
|
|
345
|
|
346 =head2 graph
|
|
347
|
|
348 Title : graph
|
|
349 Usage : $obj->graph($new_graph);
|
|
350 Function: Set and read the graph object representing relationships
|
|
351 between coordinate systems
|
|
352 Example :
|
|
353 Returns : Bio::Coordinate::Graph object
|
|
354 Args : new Bio::Coordinate::Graph object (optional)
|
|
355
|
|
356 =cut
|
|
357
|
|
358 sub graph {
|
|
359 my ($self,$value) = @_;
|
|
360 if( defined $value) {
|
|
361 $self->throw("Not a valid graph [$value]\n")
|
|
362 unless $value->isa('Bio::Coordinate::Graph');
|
|
363 $self->{'_graph'} = $value;
|
|
364 }
|
|
365 return $self->{'_graph'};
|
|
366 }
|
|
367
|
|
368 =head2 peptide
|
|
369
|
|
370 Title : peptide
|
|
371 Usage : $obj->peptide_offset($peptide_coord);
|
|
372 Function: Read and write the offset of peptide from the start of propeptide
|
|
373 and peptide length
|
|
374 Returns : a Bio::Location::Simple object
|
|
375 Args : a Bio::LocationI object
|
|
376
|
|
377 =cut
|
|
378
|
|
379 sub peptide {
|
|
380 my ($self, $value) = @_;
|
|
381 if( defined $value) {
|
|
382 $self->throw("I need a Bio::LocationI, not [". $value. "]")
|
|
383 unless $value->isa('Bio::LocationI');
|
|
384
|
|
385 $self->throw("Peptide start not defined")
|
|
386 unless defined $value->start;
|
|
387 $self->{'_peptide_offset'} = $value->start - 1;
|
|
388
|
|
389 $self->throw("Peptide end not defined")
|
|
390 unless defined $value->end;
|
|
391 $self->{'_peptide_length'} = $value->end - $self->{'_peptide_offset'};
|
|
392
|
|
393
|
|
394 my $a = $self->_create_pair
|
|
395 ('propeptide', 'peptide', $self->strict,
|
|
396 $self->{'_peptide_offset'}, $self->{'_peptide_length'} );
|
|
397 my $mapper = $COORDINATE_SYSTEMS{'propeptide'}. "-". $COORDINATE_SYSTEMS{'peptide'};
|
|
398 $self->{'_mappers'}->{$mapper} = $a;
|
|
399 }
|
|
400 return Bio::Location::Simple->new
|
|
401 (-seq_id => 'propeptide',
|
|
402 -start => $self->{'_peptide_offset'} + 1 ,
|
|
403 -end => $self->{'_peptide_length'} + $self->{'_peptide_offset'},
|
|
404 -strand => 1
|
|
405 );
|
|
406 }
|
|
407
|
|
408 =head2 peptide_offset
|
|
409
|
|
410 Title : peptide_offset
|
|
411 Usage : $obj->peptide_offset(20);
|
|
412 Function: Set and read the offset of peptide from the start of propeptide
|
|
413 Returns : set value or 0
|
|
414 Args : new value (optional)
|
|
415
|
|
416 =cut
|
|
417
|
|
418 sub peptide_offset {
|
|
419 my ($self,$offset, $len) = @_;
|
|
420 if( defined $offset) {
|
|
421 $self->throw("I need an integer, not [$offset]")
|
|
422 unless $offset =~ /^[+-]?\d+$/;
|
|
423 $self->{'_peptide_offset'} = $offset;
|
|
424
|
|
425 if (defined $len) {
|
|
426 $self->throw("I need an integer, not [$len]")
|
|
427 unless $len =~ /^[+-]?\d+$/;
|
|
428 $self->{'_peptide_length'} = $len;
|
|
429 }
|
|
430
|
|
431 my $a = $self->_create_pair
|
|
432 ('propeptide', 'peptide', $self->strict, $offset, $self->{'_peptide_length'} );
|
|
433 my $mapper = $COORDINATE_SYSTEMS{'propeptide'}. "-". $COORDINATE_SYSTEMS{'peptide'};
|
|
434 $self->{'_mappers'}->{$mapper} = $a;
|
|
435 }
|
|
436 return $self->{'_peptide_offset'} || 0;
|
|
437 }
|
|
438
|
|
439 =head2 peptide_length
|
|
440
|
|
441 Title : peptide_length
|
|
442 Usage : $obj->peptide_length(20);
|
|
443 Function: Set and read the offset of peptide from the start of propeptide
|
|
444 Returns : set value or 0
|
|
445 Args : new value (optional)
|
|
446
|
|
447 =cut
|
|
448
|
|
449
|
|
450 sub peptide_length {
|
|
451 my ($self, $len) = @_;
|
|
452 if( defined $len) {
|
|
453 $self->throw("I need an integer, not [$len]")
|
|
454 if defined $len && $len !~ /^[+-]?\d+$/;
|
|
455 $self->{'_peptide_length'} = $len;
|
|
456 }
|
|
457 return $self->{'_peptide_length'};
|
|
458 }
|
|
459
|
|
460
|
|
461 =head2 exons
|
|
462
|
|
463 Title : exons
|
|
464 Usage : $obj->exons(@exons);
|
|
465 Function: Set and read the offset of CDS from the start of transcipt
|
|
466 You do not have to sort the exons before calling this method as
|
|
467 they will be sorted automatically.
|
|
468 If you have not defined the CDS, is will be set to span all
|
|
469 exons here.
|
|
470 Returns : array of Bio::LocationI exons in genome coordinates or 0
|
|
471 Args : array of Bio::LocationI exons in genome (or entry) coordinates
|
|
472
|
|
473 =cut
|
|
474
|
|
475 sub exons {
|
|
476 my ($self,@value) = @_;
|
|
477 my $cds_mapper = $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'cds'};
|
|
478 my $inex_mapper =
|
|
479 $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'inex'};
|
|
480 my $exon_mapper =
|
|
481 $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'exon'};
|
|
482 my $intron_mapper =
|
|
483 $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'intron'};
|
|
484 my $negative_intron_mapper =
|
|
485 $COORDINATE_SYSTEMS{'intron'}. "-". $COORDINATE_SYSTEMS{'negative_intron'};
|
|
486 my $exon_cds_mapper = $COORDINATE_SYSTEMS{'exon'}. "-". $COORDINATE_SYSTEMS{'cds'};
|
|
487
|
|
488 if(@value) {
|
|
489 if (ref($value[0]) &&
|
|
490 $value[0]->isa('Bio::SeqFeatureI') and
|
|
491 $value[0]->location->isa('Bio::Location::SplitLocationI')) {
|
|
492 @value = $value[0]->location->each_Location;
|
|
493 } else {
|
|
494 $self->throw("I need an array , not [@value]")
|
|
495 unless ref \@value eq 'ARRAY';
|
|
496 $self->throw("I need a reference to an array of Bio::LocationIs, not to [".
|
|
497 $value[0]. "]")
|
|
498 unless ref $value[0] and $value[0]->isa('Bio::LocationI');
|
|
499 }
|
|
500
|
|
501 #
|
|
502 # sort the input array
|
|
503 #
|
|
504 # and if the used has not defined CDS assume it is the complete exonic range
|
|
505 if (defined $value[0]->strand && $value[0]->strand == - 1) { #reverse strand
|
|
506 @value = map { $_->[0] }
|
|
507 sort { $b->[1] <=> $a->[1] }
|
|
508 map { [ $_, $_->start] }
|
|
509 @value;
|
|
510
|
|
511 unless ($self->cds) {
|
|
512 $self->cds(new Bio::Location::Simple(-start => $value[-1]->start,
|
|
513 -end => $value[0]->end,
|
|
514 -strand=> $value[0]->strand,
|
|
515 -seq_id=> $value[0]->seq_id,
|
|
516 )
|
|
517 );
|
|
518 }
|
|
519 } else { #undef or forward strand
|
|
520 @value = map { $_->[0] }
|
|
521 sort { $a->[1] <=> $b->[1] }
|
|
522 map { [ $_, $_->start] }
|
|
523 @value;
|
|
524 unless ($self->cds) {
|
|
525 $self->cds(new Bio::Location::Simple(-start => $value[0]->start,
|
|
526 -end => $value[-1]->end,
|
|
527 -strand=> $value[0]->strand,
|
|
528 -seq_id=> $value[0]->seq_id,
|
|
529 )
|
|
530 );
|
|
531 }
|
|
532
|
|
533 }
|
|
534
|
|
535 $self->{'_chr_exons'} = \@value;
|
|
536
|
|
537 # transform exons from chromosome to gene coordinates
|
|
538 # but only if gene coordinate system has been set
|
|
539 my @exons ;
|
|
540 #my $gene_mapper = $self->$COORDINATE_SYSTEMS{'chr'}. "-". $COORDINATE_SYSTEMS{'gene'};
|
|
541 my $gene_mapper = "1-2";
|
|
542 if (defined $self->{'_mappers'}->{$gene_mapper} ) {
|
|
543
|
|
544 my $tmp_in = $self->{'_in'};
|
|
545 my $tmp_out = $self->{'_out'};
|
|
546 my $tmp_verb = $self->verbose;
|
|
547 $self->verbose(0);
|
|
548
|
|
549 $self->in('chr');
|
|
550 $self->out('gene');
|
|
551
|
|
552 @exons = map {$self->map($_)} @value;
|
|
553
|
|
554 $self->{'_in'} = ($tmp_in);
|
|
555 $self->{'_out'} = ($tmp_out);
|
|
556 $self->verbose($tmp_verb);
|
|
557 } else {
|
|
558 @exons = @value;
|
|
559 }
|
|
560
|
|
561 my $cds_map = Bio::Coordinate::Collection->new;
|
|
562 my $inex_map = Bio::Coordinate::Collection->new;
|
|
563 my $exon_map = Bio::Coordinate::Collection->new;
|
|
564 my $exon_cds_map = Bio::Coordinate::Collection->new;
|
|
565 my $intron_map = Bio::Coordinate::Collection->new;
|
|
566 my $negative_intron_map = Bio::Coordinate::Collection->new;
|
|
567
|
|
568 my $tr_end = 0;
|
|
569 my $coffset;
|
|
570 my $exon_counter;
|
|
571 my $prev_exon_end;
|
|
572
|
|
573 for my $exon ( @exons ) {
|
|
574
|
|
575 $exon_counter++;
|
|
576
|
|
577 #
|
|
578 # gene -> cds
|
|
579 #
|
|
580
|
|
581 my $match1 = Bio::Location::Simple->new
|
|
582 (-seq_id =>'gene' ,
|
|
583 -start => $exon->start,
|
|
584 -end => $exon->end, -strand=>1 );
|
|
585 my $match2 = Bio::Location::Simple->new
|
|
586 (-seq_id => 'cds',
|
|
587 -start => $tr_end + 1,
|
|
588 -end => $tr_end + $exon->end - $exon->start +1,
|
|
589 -strand=>$exon->strand );
|
|
590
|
|
591 $cds_map->add_mapper(Bio::Coordinate::Pair->new
|
|
592 (-in => $match1,
|
|
593 -out => $match2,
|
|
594 )
|
|
595 );
|
|
596
|
|
597 if ($exon->start <= 1 and $exon->end >= 1) {
|
|
598 $coffset = $tr_end - $exon->start + 1;
|
|
599 }
|
|
600 $tr_end = $tr_end + $exon->end - $exon->start + 1;
|
|
601
|
|
602 #
|
|
603 # gene -> intron
|
|
604 #
|
|
605
|
|
606 if (defined $prev_exon_end) {
|
|
607 my $match3 = Bio::Location::Simple->new
|
|
608 (-seq_id =>'gene',
|
|
609 -start => $prev_exon_end + 1,
|
|
610 -end => $exon->start -1, -strand=>$exon->strand );
|
|
611
|
|
612 my $match4 = Bio::Location::Simple->new
|
|
613 (-seq_id => 'intron'. ($exon_counter -1),
|
|
614 -start => 1,
|
|
615 -end => $exon->start - 1 - $prev_exon_end,
|
|
616 -strand=>$exon->strand );
|
|
617
|
|
618 # negative intron coordinates
|
|
619 my $match5 = Bio::Location::Simple->new
|
|
620 (-seq_id => 'intron'. ($exon_counter -1),
|
|
621 -start => -1 * ($exon->start - 2 - $prev_exon_end) -1,
|
|
622 -end => -1,
|
|
623 -strand=>$exon->strand );
|
|
624
|
|
625 $inex_map->add_mapper(Bio::Coordinate::Pair->new
|
|
626 (-in => $match3,
|
|
627 -out => $match4
|
|
628 )
|
|
629 );
|
|
630 $intron_map->add_mapper(Bio::Coordinate::Pair->new
|
|
631 (-in => $self->_clone_loc($match3),
|
|
632 -out => $self->_clone_loc($match4)
|
|
633 )
|
|
634 );
|
|
635 $negative_intron_map->add_mapper(Bio::Coordinate::Pair->new
|
|
636 (-in => $self->_clone_loc($match4),
|
|
637 -out => $match5
|
|
638 ));
|
|
639
|
|
640 }
|
|
641
|
|
642 # store the value
|
|
643 $prev_exon_end = $exon->end;
|
|
644
|
|
645 #
|
|
646 # gene -> exon
|
|
647 #
|
|
648 my $match6 = Bio::Location::Simple->new
|
|
649 (-seq_id => 'exon'. $exon_counter,
|
|
650 -start => 1,
|
|
651 -end => $exon->end - $exon->start +1,
|
|
652 -strand=> $exon->strand );
|
|
653
|
|
654 my $pair2 = Bio::Coordinate::Pair->new(-in => $self->_clone_loc($match1),
|
|
655 -out => $match6
|
|
656 );
|
|
657 my $pair3 = Bio::Coordinate::Pair->new(-in => $self->_clone_loc($match6),
|
|
658 -out => $self->_clone_loc($match2)
|
|
659 );
|
|
660 $inex_map->add_mapper(Bio::Coordinate::Pair->new
|
|
661 (-in => $self->_clone_loc($match1),
|
|
662 -out => $match6
|
|
663 )
|
|
664 );
|
|
665 $exon_map->add_mapper(Bio::Coordinate::Pair->new
|
|
666 (-in => $self->_clone_loc($match1),
|
|
667 -out => $self->_clone_loc($match6)
|
|
668 )
|
|
669 );
|
|
670 $exon_cds_map->add_mapper(Bio::Coordinate::Pair->new
|
|
671 (-in => $self->_clone_loc($match6),
|
|
672 -out => $self->_clone_loc($match2)
|
|
673 )
|
|
674 );
|
|
675
|
|
676 }
|
|
677
|
|
678 # move coordinate start if exons have negative values
|
|
679 if ($coffset) {
|
|
680 foreach my $m ($cds_map->each_mapper) {
|
|
681 $m->out->start($m->out->start - $coffset);
|
|
682 $m->out->end($m->out->end - $coffset);
|
|
683 }
|
|
684
|
|
685 }
|
|
686
|
|
687 $self->{'_mappers'}->{$cds_mapper} = $cds_map;
|
|
688 $self->{'_mappers'}->{$exon_cds_mapper} = $exon_cds_map;
|
|
689 $self->{'_mappers'}->{$inex_mapper} = $inex_map;
|
|
690 $self->{'_mappers'}->{$exon_mapper} = $exon_map;
|
|
691 $self->{'_mappers'}->{$intron_mapper} = $intron_map;
|
|
692 $self->{'_mappers'}->{$negative_intron_mapper} = $negative_intron_map;
|
|
693 }
|
|
694 return @{$self->{'_chr_exons'}} || 0;
|
|
695 }
|
|
696
|
|
697 =head2 _clone_loc
|
|
698
|
|
699 Title : _clone_loc
|
|
700 Usage : $copy_of_loc = $obj->_clone_loc($loc);
|
|
701 Function: Make a deep copy of a simple location
|
|
702 Returns : a Bio::Location::Simple object
|
|
703 Args : a Bio::Location::Simple object to be cloned
|
|
704
|
|
705 =cut
|
|
706
|
|
707
|
|
708 sub _clone_loc { # clone a simple location
|
|
709 my ($self,$loc) = @_;
|
|
710
|
|
711 $self->throw("I need a Bio::Location::Simple , not [". ref $loc. "]")
|
|
712 unless $loc->isa('Bio::Location::Simple');
|
|
713
|
|
714 return Bio::Location::Simple->new
|
|
715 (-seq_id => $loc->seq_id,
|
|
716 -start => $loc->start,
|
|
717 -end => $loc->end,
|
|
718 -strand=> $loc->strand,
|
|
719 -location_type => $loc->location_type
|
|
720 );
|
|
721 }
|
|
722
|
|
723
|
|
724 =head2 cds
|
|
725
|
|
726 Title : cds
|
|
727 Usage : $obj->cds(20);
|
|
728 Function: Set and read the offset of CDS from the start of transcipt
|
|
729
|
|
730 Simple input can be an integer which gives the start of the
|
|
731 coding region in genomic coordinate. If you want to provide
|
|
732 the end of the coding region or indicate the use of the
|
|
733 opposite strand, you have to pass a Bio::RangeI
|
|
734 (e.g. Bio::Location::Simple or Bio::SegFeature::Generic)
|
|
735 object to this method.
|
|
736
|
|
737 Returns : set value or 0
|
|
738 Args : new value (optional)
|
|
739
|
|
740 =cut
|
|
741
|
|
742 sub cds {
|
|
743 my ($self,$value) = @_;
|
|
744 if( defined $value) {
|
|
745 if ($value =~ /^[+-]?\d+$/ ) {
|
|
746 my $loc = Bio::Location::Simple->new(-start=>$value);
|
|
747 $self->{'_cds'} = $loc;
|
|
748 }
|
|
749 elsif (ref $value && $value->isa('Bio::RangeI') ) {
|
|
750 $self->{'_cds'} = $value;
|
|
751 } else {
|
|
752 $self->throw("I need an integer or Bio::RangeI, not [$value]")
|
|
753 }
|
|
754 # strand !!
|
|
755 my $len;
|
|
756
|
|
757 $len = $self->{'_cds'}->end - $self->{'_cds'}->start +1
|
|
758 if defined $self->{'_cds'}->end;
|
|
759
|
|
760 my $a = $self->_create_pair
|
|
761 ('chr', 'gene', 0,
|
|
762 $self->{'_cds'}->start-1,
|
|
763 $len,
|
|
764 $self->{'_cds'}->strand);
|
|
765 my $mapper = $COORDINATE_SYSTEMS{'chr'}. "-". $COORDINATE_SYSTEMS{'gene'};
|
|
766 $self->{'_mappers'}->{$mapper} = $a;
|
|
767
|
|
768 # recalculate exon-based mappers
|
|
769 if ( defined $self->{'_chr_exons'} ) {
|
|
770 $self->exons(@{$self->{'_chr_exons'}});
|
|
771 }
|
|
772
|
|
773 }
|
|
774 return $self->{'_cds'} || 0;
|
|
775 }
|
|
776
|
|
777
|
|
778 =head2 map
|
|
779
|
|
780 Title : map
|
|
781 Usage : $newpos = $obj->map(5);
|
|
782 Function: Map the location from the input coordinate system
|
|
783 to a new value in the output coordinate system.
|
|
784 Example :
|
|
785 Returns : new value in the output coordiante system
|
|
786 Args : a Bio::Location::Simple
|
|
787
|
|
788 =cut
|
|
789
|
|
790 sub map {
|
|
791 my ($self,$value) = @_;
|
|
792 my ($res);
|
|
793
|
|
794 $self->throw("Need to pass me a Bio::Location::Simple or ".
|
|
795 "Bio::Location::Simple or Bio::SeqFeatureI, not [".
|
|
796 ref($value). "]")
|
|
797 unless ref($value) && ($value->isa('Bio::Location::Simple') or
|
|
798 $value->isa('Bio::Location::SplitLocationI') or
|
|
799 $value->isa('Bio::SeqFeatureI'));
|
|
800 $self->throw("Input coordinate system not set")
|
|
801 unless $self->{'_in'};
|
|
802 $self->throw("Output coordinate system not set")
|
|
803 unless $self->{'_out'};
|
|
804 $self->throw("Do not be silly. Input and output coordinate ".
|
|
805 "systems are the same!")
|
|
806 unless $self->{'_in'} != $self->{'_out'};
|
|
807
|
|
808 $self->_check_direction();
|
|
809
|
|
810 $value = $value->location if $value->isa('Bio::SeqFeatureI');
|
|
811 print STDERR "=== Start location: ". $value->start. ",".
|
|
812 $value->end. " (". $value->strand. ")\n" if $self->verbose > 0;
|
|
813
|
|
814
|
|
815 # if nozero coordinate system is used in the input values
|
|
816 if ( defined $self->{'_nozero'} &&
|
|
817 ( $self->{'_nozero'} == 1 || $self->{'_nozero'} == 3 ) ) {
|
|
818 $value->start($value->start + 1)
|
|
819 if defined $value->start && $value->start < 1;
|
|
820 $value->end($value->end + 1)
|
|
821 if defined $value->end && $value->end < 1;
|
|
822 }
|
|
823
|
|
824 my @steps = $self->_get_path();
|
|
825 print "mapping ", $self->{'_in'}, "->", $self->{'_out'},
|
|
826 " Mappers: ", join(", ", @steps), "\n" if $self->verbose > 0;
|
|
827
|
|
828 foreach my $mapper (@steps) {
|
|
829 if ($mapper eq $TRANSLATION) {
|
|
830 if ($self->direction == 1) {
|
|
831 $value = $self->_translate($value);
|
|
832 print STDERR "+ $TRANSLATION cds -> propeptide (translate) \n"
|
|
833 if $self->verbose > 0;
|
|
834 } else {
|
|
835 $value = $self->_reverse_translate($value);
|
|
836 print STDERR "+ $TRANSLATION propeptide -> cds (reverse translate) \n"
|
|
837 if $self->verbose > 0;
|
|
838 }
|
|
839 }
|
|
840 # keep the start and end values, and go on to next iteration
|
|
841 # if this mapper is not set
|
|
842 elsif ( ! defined $self->{'_mappers'}->{$mapper} ) {
|
|
843 # update mapper name
|
|
844 $mapper =~ /\d+-(\d+)/; my ($counter) = $1;
|
|
845 $value->seq_id($COORDINATE_INTS{$counter});
|
|
846 print STDERR "- $mapper\n" if $self->verbose > 0;
|
|
847 } else {
|
|
848
|
|
849 #
|
|
850 # the DEFAULT : generic mapping
|
|
851 #
|
|
852 $value = $self->{'_mappers'}->{$mapper}->map($value);
|
|
853 $value->purge_gaps
|
|
854 if ($value && $value->isa('Bio::Location::SplitLocationI') && $value->can('gap'));
|
|
855 print STDERR "+ $mapper (", $self->direction, "): start ",
|
|
856 $value->start, " end ", $value->end, "\n"
|
|
857 if $value && $self->verbose > 0;
|
|
858 }
|
|
859 }
|
|
860
|
|
861 # if nozero coordinate system is asked to be used in the output values
|
|
862 if ( defined $value && defined $self->{'_nozero'} &&
|
|
863 ( $self->{'_nozero'} == 2 || $self->{'_nozero'} == 3 ) ) {
|
|
864
|
|
865 $value->start($value->start - 1)
|
|
866 if defined $value->start && $value->start < 1;
|
|
867 $value->end($value->end - 1)
|
|
868 if defined $value->end && $value->end < 1;
|
|
869 }
|
|
870
|
|
871 # handle merging of adjacent split locations!
|
|
872
|
|
873 if (ref $value eq "Bio::Coordinate::Result" && $value->each_match > 1 ) {
|
|
874 my $prevloc;
|
|
875 my $merging = 0;
|
|
876 my $newvalue;
|
|
877 my @matches;
|
|
878 foreach my $loc ( $value->each_Location(1) ) {
|
|
879 unless ($prevloc) {
|
|
880 $prevloc = $loc;
|
|
881 push @matches, $prevloc;
|
|
882 next;
|
|
883 }
|
|
884 if ($prevloc->end == ($loc->start - 1) && $prevloc->seq_id eq $loc->seq_id) {
|
|
885 $prevloc->end($loc->end);
|
|
886 $merging = 1;
|
|
887 } else {
|
|
888 push @matches, $loc;
|
|
889 $prevloc = $loc;
|
|
890 }
|
|
891 }
|
|
892 if ($merging) {
|
|
893 if (@matches > 1 ) {
|
|
894 $newvalue = Bio::Coordinate::Result->new;
|
|
895 map {$newvalue->add_sub_Location} @matches;
|
|
896 } else {
|
|
897 $newvalue = Bio::Coordinate::Result::Match->new
|
|
898 (-seq_id => $matches[0]->seq_id,
|
|
899 -start => $matches[0]->start,
|
|
900 -end => $matches[0]->end,
|
|
901 -strand=> $matches[0]->strand );
|
|
902 }
|
|
903 $value = $newvalue;
|
|
904 }
|
|
905 }
|
|
906 elsif (ref $value eq "Bio::Coordinate::Result" && $value->each_match == 1 ){
|
|
907 $value = $value->match;
|
|
908 }
|
|
909
|
|
910
|
|
911 return $value;
|
|
912 }
|
|
913
|
|
914 =head2 direction
|
|
915
|
|
916 Title : direction
|
|
917 Usage : $obj->direction('peptide');
|
|
918 Function: Read-only method for the direction of mapping deduced from
|
|
919 predefined input and output coordinate names.
|
|
920 Example :
|
|
921 Returns : 1 or -1, mapping direction
|
|
922 Args : new value (optional)
|
|
923
|
|
924 =cut
|
|
925
|
|
926 sub direction {
|
|
927 my ($self) = @_;
|
|
928 return $self->{'_direction'};
|
|
929 }
|
|
930
|
|
931
|
|
932 =head2 swap
|
|
933
|
|
934 Title : swap
|
|
935 Usage : $obj->swap;
|
|
936 Function: Swap the direction of transformation
|
|
937 (input <-> output)
|
|
938 Example :
|
|
939 Returns : 1
|
|
940 Args :
|
|
941
|
|
942 =cut
|
|
943
|
|
944 sub swap {
|
|
945 my ($self,$value) = @_;
|
|
946
|
|
947 ($self->{'_in'}, $self->{'_out'}) = ($self->{'_out'}, $self->{'_in'});
|
|
948 map { $self->{'_mappers'}->{$_}->swap } keys %{$self->{'_mappers'}};
|
|
949
|
|
950 # record the changed direction;
|
|
951 $self->{_direction} *= -1;
|
|
952
|
|
953 return 1;
|
|
954 }
|
|
955
|
|
956
|
|
957 =head2 to_string
|
|
958
|
|
959 Title : to_string
|
|
960 Usage : $newpos = $obj->to_string(5);
|
|
961 Function: Dump the internal mapper values into a human readable format
|
|
962 Example :
|
|
963 Returns : string
|
|
964 Args :
|
|
965
|
|
966 =cut
|
|
967
|
|
968 sub to_string {
|
|
969 my ($self) = shift;
|
|
970
|
|
971 print "-" x 40, "\n";
|
|
972
|
|
973 # chr-gene
|
|
974 my $mapper_str = 'chr-gene';
|
|
975 my $mapper = $self->_mapper_string2code($mapper_str);
|
|
976
|
|
977 printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
|
|
978 if (defined $self->cds) {
|
|
979 my $end = $self->cds->end -1 if defined $self->cds->end;
|
|
980 printf "%16s%s: %s (%s)\n", ' ', 'gene offset', $self->cds->start-1 , $end || '';
|
|
981 printf "%16s%s: %s\n", ' ', 'gene strand', $self->cds->strand || 0;
|
|
982 }
|
|
983
|
|
984 # gene-intron
|
|
985 $mapper_str = 'gene-intron';
|
|
986 $mapper = $self->_mapper_string2code($mapper_str);
|
|
987 printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
|
|
988
|
|
989 my $i = 1;
|
|
990 foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
|
|
991 printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
|
|
992 printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
|
|
993 $i++;
|
|
994 }
|
|
995
|
|
996 # intron-negative_intron
|
|
997 $mapper_str = 'intron-negative_intron';
|
|
998 $mapper = $self->_mapper_string2code($mapper_str);
|
|
999 printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
|
|
1000
|
|
1001 $i = 1;
|
|
1002 foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
|
|
1003 printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
|
|
1004 printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
|
|
1005 $i++;
|
|
1006 }
|
|
1007
|
|
1008
|
|
1009 # gene-exon
|
|
1010 $mapper_str = 'gene-exon';
|
|
1011 $mapper = $self->_mapper_string2code($mapper_str);
|
|
1012 printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
|
|
1013
|
|
1014 $i = 1;
|
|
1015 foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
|
|
1016 printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
|
|
1017 printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
|
|
1018 $i++;
|
|
1019 }
|
|
1020
|
|
1021
|
|
1022 # gene-cds
|
|
1023 $mapper_str = 'gene-cds';
|
|
1024 $mapper = $self->_mapper_string2code($mapper_str);
|
|
1025 printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
|
|
1026
|
|
1027 $i = 1;
|
|
1028 foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
|
|
1029 printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
|
|
1030 printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
|
|
1031 $i++;
|
|
1032 }
|
|
1033
|
|
1034 # cds-propeptide
|
|
1035 $mapper_str = 'cds-propeptide';
|
|
1036 $mapper = $self->_mapper_string2code($mapper_str);
|
|
1037 printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
|
|
1038 printf "%9s%-12s\n", "", '"translate"';
|
|
1039
|
|
1040
|
|
1041 # propeptide-peptide
|
|
1042 $mapper_str = 'propeptide-peptide';
|
|
1043 $mapper = $self->_mapper_string2code($mapper_str);
|
|
1044 printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
|
|
1045 printf "%16s%s: %s\n", ' ', "peptide offset", $self->peptide_offset;
|
|
1046
|
|
1047
|
|
1048
|
|
1049 print "\nin : ", $self->in, "\n";
|
|
1050 print "out: ", $self->out, "\n";
|
|
1051 my $dir;
|
|
1052 $self->direction ? ($dir='forward') : ($dir='reverse');
|
|
1053 printf "direction: %-8s(%s)\n", $dir, $self->direction;
|
|
1054 print "\n", "-" x 40, "\n";
|
|
1055
|
|
1056 1;
|
|
1057 }
|
|
1058
|
|
1059 sub _mapper_code2string {
|
|
1060 my ($self, $code) = @_;
|
|
1061 my ($a, $b) = $code =~ /(\d+)-(\d+)/;
|
|
1062 return $COORDINATE_INTS{$a}. '-'. $COORDINATE_INTS{$b};
|
|
1063
|
|
1064 }
|
|
1065
|
|
1066 sub _mapper_string2code {
|
|
1067 my ($self, $string) =@_;
|
|
1068 my ($a, $b) = $string =~ /([^-]+)-(.*)/;
|
|
1069 return $COORDINATE_SYSTEMS{$a}. '-'. $COORDINATE_SYSTEMS{$b};
|
|
1070 }
|
|
1071
|
|
1072
|
|
1073 =head2 _create_pair
|
|
1074
|
|
1075 Title : _create_pair
|
|
1076 Usage : $mapper = $obj->_create_pair('chr', 'gene', 0, 2555, 10000, -1);
|
|
1077 Function: Internal helper method to create a mapper between
|
|
1078 two coordinate systems
|
|
1079 Returns : a Bio::Coordinate::Pair object
|
|
1080 Args : string, input coordinate system name,
|
|
1081 string, output coordinate system name,
|
|
1082 boolean, strict mapping
|
|
1083 positive integer, offset
|
|
1084 positive integer, length
|
|
1085 1 || -1 , strand
|
|
1086
|
|
1087 =cut
|
|
1088
|
|
1089 sub _create_pair {
|
|
1090 my ($self, $in, $out, $strict, $offset, $length, $strand ) = @_;
|
|
1091 $strict ||= 0;
|
|
1092 $strand ||= 1;
|
|
1093 $length ||= 20;
|
|
1094
|
|
1095 my $match1 = Bio::Location::Simple->new
|
|
1096 (-seq_id => $in,
|
|
1097 -start => $offset+1,
|
|
1098 -end => $offset+$length, -strand=>1 );
|
|
1099
|
|
1100 my $match2 = Bio::Location::Simple->new
|
|
1101 (-seq_id => $out,
|
|
1102 -start => 1,
|
|
1103 -end => $length, -strand=>$strand );
|
|
1104
|
|
1105 my $pair = Bio::Coordinate::ExtrapolatingPair->new
|
|
1106 (-in => $match1,
|
|
1107 -out => $match2,
|
|
1108 -strict => $strict
|
|
1109 );
|
|
1110
|
|
1111 return $pair;
|
|
1112
|
|
1113 }
|
|
1114
|
|
1115
|
|
1116 =head2 _translate
|
|
1117
|
|
1118 Title : _translate
|
|
1119 Usage : $newpos = $obj->_translate($loc);
|
|
1120 Function: Translate the location from the CDS coordinate system
|
|
1121 to a new value in the propeptide coordinate system.
|
|
1122 Example :
|
|
1123 Returns : new location
|
|
1124 Args : a Bio::Location::Simple or Bio::Location::SplitLocationI
|
|
1125
|
|
1126 =cut
|
|
1127
|
|
1128 sub _translate {
|
|
1129 my ($self,$value) = @_;
|
|
1130
|
|
1131 $self->throw("Need to pass me a Bio::Location::Simple or ".
|
|
1132 "Bio::Location::SplitLocationI, not [". ref($value). "]")
|
|
1133 unless defined $value &&
|
|
1134 ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI'));
|
|
1135
|
|
1136 my $seqid = 'propeptide';
|
|
1137
|
|
1138 if ($value->isa("Bio::Location::SplitLocationI")) {
|
|
1139 my $split = new Bio::Location::Split(-seq_id=>$seqid);
|
|
1140 foreach my $loc ( $value->each_Location(1) ) {
|
|
1141
|
|
1142 my $match = new Bio::Location::Simple(-start => int($loc->start / 3 )+1,
|
|
1143 -end => int($loc->end / 3 )+1,
|
|
1144 -seq_id => $seqid,
|
|
1145 -strand => 1
|
|
1146 );
|
|
1147 $split->add_sub_Location($match);
|
|
1148 }
|
|
1149 return $split;
|
|
1150
|
|
1151 } else {
|
|
1152 return new Bio::Location::Simple(-start => int($value->start / 3 )+1,
|
|
1153 -end => int($value->end / 3 )+1,
|
|
1154 -seq_id => $seqid,
|
|
1155 -strand => 1
|
|
1156 );
|
|
1157 }
|
|
1158 }
|
|
1159
|
|
1160 sub _frame {
|
|
1161 my ($self,$value) = @_;
|
|
1162
|
|
1163 $self->throw("Need to pass me a Bio::Location::Simple or ".
|
|
1164 "Bio::Location::SplitLocationI, not [". ref($value). "]")
|
|
1165 unless defined $value &&
|
|
1166 ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI'));
|
|
1167
|
|
1168 my $seqid = 'propeptide';
|
|
1169
|
|
1170 if ($value->isa("Bio::Location::SplitLocationI")) {
|
|
1171 my $split = new Bio::Location::Split(-seq_id=>$seqid);
|
|
1172 foreach my $loc ( $value->each_Location(1) ) {
|
|
1173
|
|
1174 my $match = new Bio::Location::Simple(-start => ($value->start-1) % 3 +1,
|
|
1175 -end => ($value->end-1) % 3 +1,
|
|
1176 -seq_id => 'frame',
|
|
1177 -strand => 1
|
|
1178 );
|
|
1179 $split->add_sub_Location($match);
|
|
1180 }
|
|
1181 return $split;
|
|
1182 } else {
|
|
1183 return new Bio::Location::Simple(-start => ($value->start-1) % 3 +1,
|
|
1184 -end => ($value->end-1) % 3 +1,
|
|
1185 -seq_id => 'frame',
|
|
1186 -strand => 1
|
|
1187 );
|
|
1188 }
|
|
1189 }
|
|
1190
|
|
1191
|
|
1192 =head2 _reverse_translate
|
|
1193
|
|
1194 Title : _reverse_translate
|
|
1195 Usage : $newpos = $obj->_reverse_translate(5);
|
|
1196 Function: Reverse translate the location from the propeptide
|
|
1197 coordinate system to a new value in the CSD.
|
|
1198 Note that a single peptide location expands to cover
|
|
1199 the codon triplet
|
|
1200 Example :
|
|
1201 Returns : new location in the CDS coordinate system
|
|
1202 Args : a Bio::Location::Simple or Bio::Location::SplitLocationI
|
|
1203
|
|
1204 =cut
|
|
1205
|
|
1206 sub _reverse_translate {
|
|
1207 my ($self,$value) = @_;
|
|
1208
|
|
1209
|
|
1210 $self->throw("Need to pass me a Bio::Location::Simple or ".
|
|
1211 "Bio::Location::SplitLocationI, not [". ref($value). "]")
|
|
1212 unless defined $value &&
|
|
1213 ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI'));
|
|
1214
|
|
1215 my $seqid = 'cds';
|
|
1216
|
|
1217 if ($value->isa("Bio::Location::SplitLocationI")) {
|
|
1218 my $split = new Bio::Location::Split(-seq_id=>$seqid);
|
|
1219 foreach my $loc ( $value->each_Location(1) ) {
|
|
1220
|
|
1221 my $match = new Bio::Location::Simple(-start => $value->start * 3 - 2,
|
|
1222 -end => $value->end * 3,
|
|
1223 -seq_id => $seqid,
|
|
1224 -strand => 1
|
|
1225 );
|
|
1226 $split->add_sub_Location($match);
|
|
1227 }
|
|
1228 return $split;
|
|
1229
|
|
1230 } else {
|
|
1231 return new Bio::Location::Simple(-start => $value->start * 3 - 2,
|
|
1232 -end => $value->end * 3,
|
|
1233 -seq_id => $seqid,
|
|
1234 -strand => 1
|
|
1235 );
|
|
1236 }
|
|
1237 }
|
|
1238
|
|
1239
|
|
1240 =head2 _check_direction
|
|
1241
|
|
1242 Title : _check_direction
|
|
1243 Usage : $obj->_check_direction();
|
|
1244 Function: Check and swap when needed the direction the location
|
|
1245 mapping Pairs based on input and output values
|
|
1246 Example :
|
|
1247 Returns : new location
|
|
1248 Args : a Bio::Location::Simple
|
|
1249
|
|
1250 =cut
|
|
1251
|
|
1252 sub _check_direction {
|
|
1253 my ($self) = @_;
|
|
1254
|
|
1255 my $new_direction = 1;
|
|
1256 $new_direction = -1 if $self->{'_in'} > $self->{'_out'};
|
|
1257
|
|
1258 unless ($new_direction == $self->{_direction} ) {
|
|
1259 map { $self->{'_mappers'}->{$_}->swap } keys %{$self->{'_mappers'}};
|
|
1260 # record the changed direction;
|
|
1261 $self->{_direction} *= -1;
|
|
1262 }
|
|
1263 1;
|
|
1264 }
|
|
1265
|
|
1266
|
|
1267 =head2 _get_path
|
|
1268
|
|
1269 Title : _get_path
|
|
1270 Usage : $obj->_get_path('peptide');
|
|
1271 Function: internal method for finding that shortest path between
|
|
1272 input and output coordinate systems.
|
|
1273 Calculations and caching are handled by the graph class.
|
|
1274 See L<Bio::Coordinate::Graph>.
|
|
1275 Example :
|
|
1276 Returns : array of the mappers
|
|
1277 Args : none
|
|
1278
|
|
1279 =cut
|
|
1280
|
|
1281 sub _get_path {
|
|
1282 my ($self) = @_;
|
|
1283
|
|
1284 my $start = $self->{'_in'} || 0;
|
|
1285 my $end = $self->{'_out'} || 0;
|
|
1286
|
|
1287 # note the order
|
|
1288 # always go from smaller to bigger: it makes caching more efficient
|
|
1289 my $reverse;
|
|
1290 if ($start > $end) {
|
|
1291 ($start, $end) = ($end, $start );
|
|
1292 $reverse++;
|
|
1293 }
|
|
1294
|
|
1295 my @mappers;
|
|
1296 if (exists $self->{'_previous_path'} and
|
|
1297 $self->{'_previous_path'} eq "$start$end" ) {
|
|
1298 # use cache
|
|
1299 @mappers = @{$self->{'_mapper_path'}};
|
|
1300 } else {
|
|
1301 my $mapper;
|
|
1302 my $prev_node = '';
|
|
1303 @mappers =
|
|
1304 map { $mapper = "$prev_node-$_"; $prev_node = $_; $mapper; }
|
|
1305 $self->{'_graph'}->shortest_path($start, $end);
|
|
1306 shift @mappers;
|
|
1307
|
|
1308 $self->{'_previous_path'} = "$start$end";
|
|
1309 $self->{'_mapper_path'} = \@mappers;
|
|
1310 }
|
|
1311
|
|
1312 $reverse ? return reverse @mappers : return @mappers;
|
|
1313 }
|
|
1314
|
|
1315
|
|
1316 1;
|
|
1317
|