comparison variant_effect_predictor/Bio/Map/CytoPosition.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: CytoPosition.pm,v 1.4 2002/10/22 07:38:35 lapp Exp $
2 #
3 # BioPerl module for Bio::Map::CytoPosition
4 #
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk>
6 #
7 # Copyright Heikki Lehvaslaiho
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Map::CytoPosition - Marker class with cytogenetic band storing attributes
16
17 =head1 SYNOPSIS
18
19 $m1 = Bio::Map::CytoPosition->new ( '-id' => 'A1',
20 '-value' => '2q1-3'
21 );
22 $m2 = Bio::Map::CytoPosition->new ( '-id' => 'A2',
23 '-value' => '2q2'
24 );
25
26 if ($m1->cytorange->overlaps($m2->cytorange)) {
27 print "Makers overlap\n";
28 }
29
30
31 =head1 DESCRIPTION
32
33 CytoPosition is marker (Bio::Map::MarkerI compliant) with a location in a
34 cytogenetic map. See L<Bio::Map::MarkerI> for more information.
35
36 Cytogenetic locations are names of bands visible in stained mitotic
37 eucaryotic chromosomes. The naming follows strict rules which are
38 consistant at least in higher vertebates, e.g. mammals. The chromosome
39 name preceds the band names.
40
41 The shorter arm of the chromosome is called 'p' ('petit') and usually
42 drawn pointing up. The lower arm is called 'q' ('queue'). The bands
43 are named from the region separting these, a centromere (cen), towards
44 the tips or telomeric regions (ter) counting from 1 upwards. Depending
45 of the resolution used the bands are identified with one or more
46 digit. The first digit determines the major band and subsequent digits
47 sub bands: p1 band can be divided into subbands p11, p12 and 13 and
48 p11 can furter be divided into subbands p11.1 and p11.2. The dot after
49 second digit makes it easier to read the values. A region between ands
50 is given from the centromere outwards towards the telomere (e.g. 2p2-5
51 or 3p21-35) or from a band in the p arm to a band in the q arm.
52
53 =head1 FEEDBACK
54
55 =head2 Mailing Lists
56
57 User feedback is an integral part of the evolution of this and other
58 Bioperl modules. Send your comments and suggestions preferably to the
59 Bioperl mailing lists Your participation is much appreciated.
60
61 bioperl-l@bioperl.org - General discussion
62 http://bio.perl.org/MailList.html - About the mailing lists
63
64 =head2 Reporting Bugs
65
66 report bugs to the Bioperl bug tracking system to help us keep track
67 the bugs and their resolution. Bug reports can be submitted via
68 email or the web:
69
70 bioperl-bugs@bio.perl.org
71 http://bugzilla.bioperl.org/
72
73 =head1 AUTHOR - Heikki Lehvaslaiho
74
75 Email: heikki@ebi.ac.uk
76 Address:
77
78 EMBL Outstation, European Bioinformatics Institute
79 Wellcome Trust Genome Campus, Hinxton
80 Cambs. CB10 1SD, United Kingdom
81
82
83 =head1 APPENDIX
84
85 The rest of the documentation details each of the object
86 methods. Internal methods are usually preceded with a _
87
88 =cut
89
90
91 # Let the code begin...
92
93 package Bio::Map::CytoPosition;
94 use vars qw(@ISA $VERSION);
95
96 use strict;
97 use integer;
98
99 $VERSION=1.0;
100
101 # Object preamble - inheritance
102
103 use Bio::Variation::VariantI;
104 use Bio::RangeI;
105 use Bio::Map::Position;
106
107 @ISA = qw( Bio::Map::Position Bio::Variation::VariantI );
108
109
110 =head2 cytorange
111
112 Title : cytorange
113 Usage : $obj->cytorange();
114 Function:
115
116 Converts cytogenetic location set by value method into
117 an integer range. The chromosome number determines the
118 "millions" in the values. Human X and Y chromosome
119 symbols are represented by values 100 and 101.
120
121 The localization within chromosomes are converted into
122 values between the range of 0 and 200,000:
123
124 pter cen qter
125 |------------------------|-------------------------|
126 0 100,000 200,000
127
128 The values between -100,000 through 0 for centromere to
129 100,000 would have reflected the band numbering better but
130 use of positive integers was choosen since the
131 transformation is very easy. These values are not metric.
132
133 Each band defines a range in a chromosome. A band string
134 is converted into a range by padding it with lower and and
135 higher end digits (for q arm: '0' and '9') to the length
136 of five. The arm and chromosome values are added to these:
137 e.g. 21000 & 21999 (band 21) + 100,000 (q arm) + 2,000,000
138 (chromosome 2) => 2q21 : 2,121,000 .. 2,121,999. Note that
139 this notation breaks down if there is a band or a subband
140 using digit 9 in its name! This is not the case in human
141 karyotype.
142
143 The full algorithm used for bands:
144
145 if arm is 'q' then
146 pad char for start is '0', for end '9'
147 range is chromosome + 100,000 + padded range start or end
148 elsif arm is 'p' then
149 pad char for start is '9', for end '0'
150 range is chromosome + 100,000 - padded range start or end
151
152 Example : Returns : Bio::Range object or undef
153 Args : none
154
155 =cut
156
157
158 sub cytorange {
159 my ($self) = @_;
160 my ($chr, $r, $band, $band2, $arm, $arm2, $lc, $uc, $lcchar, $ucchar) = undef;
161
162 return $r if not defined $self->value; # returns undef
163 $self->value =~
164 # -----1----- --------2--------- -----3----- -------4------- ---6---
165 m/([XY]|[0-9]+)(cen|qcen|pcen|[pq])?(ter|[.0-9]+)?-?([pq]?(cen|ter)?)?([.0-9]+)?/;
166 $self->warn("Not a valid value: ". $self->value), return $r
167 if not defined $1 ; # returns undef
168
169 $chr = uc $1;
170 $self->chr($chr);
171
172 $chr = 100 if $chr eq 'X';
173 $chr = 101 if $chr eq 'Y';
174 $chr *= 1000000;
175
176 $r = new Bio::Range();
177
178 $band = '';
179 if (defined $3 ) {
180 $2 || $self->throw("$& does not make sense: 'arm' or 'cen' missing");
181 $band = $3;
182 $band =~ tr/\.//d;
183 }
184 if (defined $6 ) {
185 $arm2 = $4;
186 $arm2 = $2 if $4 eq ''; # it is not necessary to repeat the arm [p|q]
187 $band2 = $6;
188 $band2 =~ tr/\.//d;
189 #find the correct order
190 # print STDERR "-|$&|----2|$2|-----3|$band|---4|$4|--------arm2|$arm2|-------------\n";
191 if ($band ne '' and (defined $arm2 and $2 ne $arm2 and $arm2 eq 'q') ) {
192 $lc = 'start'; $lcchar = '9';
193 $uc = 'end'; $ucchar = '9';
194 }
195 elsif ($band ne 'ter' and $2 ne $arm2 and $arm2 eq 'p') {
196 $lc = 'end'; $lcchar = '9';
197 $uc = 'start'; $ucchar = '9';
198 }
199 elsif ($band eq 'ter' and $arm2 eq 'p') {
200 $uc = 'start'; $ucchar = '9';
201 } # $2 eq $arm2
202 elsif ($arm2 eq 'q') {
203 if (_pad($band, 5, '0') < _pad($band2, 5, '0')) {
204 $lc = 'start'; $lcchar = '0';
205 $uc = 'end'; $ucchar = '9';
206 } else {
207 $lc = 'end'; $lcchar = '9';
208 $uc = 'start'; $ucchar = '0';
209 }
210 }
211 elsif ($arm2 eq 'p') {
212 if (_pad($band, 5, '0') < _pad($band2, 5, '0')) {
213 $lc = 'end'; $lcchar = '0';
214 $uc = 'start'; $ucchar = '9';
215 } else {
216 $lc = 'start'; $lcchar = '9';
217 $uc = 'end'; $ucchar = '0';
218 }
219 }
220 else {
221 $self->throw("How did you end up here? $&");
222 }
223
224 #print STDERR "-------$arm2--------$band2---------$ucchar--------------\n";
225 if ( (defined $arm2 and $arm2 eq 'p') or (defined $arm2 and $arm2 eq 'p') ) {
226 $r->$uc(-(_pad($band2, 5, $ucchar)) + 100000 + $chr );
227 if (defined $3 and $3 eq 'ter') {
228 $r->end(200000 + $chr);
229 }
230 elsif ($2 eq 'cen' or $2 eq 'qcen' or $2 eq 'pcen'){
231 $r->$lc(100000 + $chr);
232 }
233 elsif ($2 eq 'q') {
234 $r->$lc(_pad($band, 5, $lcchar) + 100000 + $chr );
235 } else {
236 $r->$lc(-(_pad($band, 5, $lcchar)) + 100000 + $chr );
237 }
238 } else { #if:$arm2=q e.g. 9p22-q32
239 #print STDERR "-------$arm2--------$band2---------$ucchar--------------\n";
240 $r->$uc(_pad($band2, 5, $ucchar) + 100000 + $chr);
241 if ($2 eq 'cen' or $2 eq 'pcen') {
242 $r->$lc(100000 + $chr);
243 }
244 elsif ($2 eq 'p') {
245 if ($3 eq 'ter') {
246 $r->$lc(200000 + $chr);
247 } else {
248 $r->$lc(-(_pad($band, 5, $lcchar)) + 100000 + $chr);
249 }
250 } else { #$2.==q
251 $r->$lc(_pad($band, 5, $lcchar) + 100000 + $chr);
252 }
253 }
254 }
255 #
256 # e.g. 10p22.1-cen
257 #
258 elsif (defined $4 and $4 ne '') {
259 #print STDERR "$4-----$&----\n";
260 if ($4 eq 'cen' || $4 eq 'qcen' || $4 eq 'pcen') { # e.g. 10p22.1-cen;
261 # '10pcen-qter' does not really make sense but lets have it in anyway
262 $r->end(100000 + $chr);
263 if ($2 eq 'p') {
264 if ($3 eq 'ter') {
265 $r->start($chr);
266 } else {
267 $r->start(_pad($band, 5, '9') + $chr);
268 }
269 }
270 elsif ($2 eq 'cen') {
271 $self->throw("'cen-cen' does not make sense: $&");
272 } else {
273 $self->throw("Only order p-cen is valid: $&");
274 }
275 }
276 elsif ($4 eq 'qter' || $4 eq 'ter') { # e.g. 10p22.1-qter, 1p21-qter, 10pcen-qter, 7q34-qter
277 $r->end(200000 + $chr);
278 if ($2 eq 'p'){
279 $r->start(-(_pad($band, 5, '9')) + 100000 + $chr); #??? OK?
280 }
281 elsif ($2 eq 'q') {
282 $r->start(_pad($band, 5, '0') + 100000 + $chr);
283 }
284 elsif ($2 eq 'cen' || $2 eq 'qcen' || $2 eq 'pcen' ) {
285 $r->start(100000 + $chr);
286 }
287 }
288 elsif ($4 eq 'pter' ) {
289 #print STDERR "$2,$3--$4-----$&----\n";
290 $r->start( $chr);
291 if ($2 eq 'p'){
292 $r->end(-(_pad($band, 5, '0')) + 100000 + $chr);
293 }
294 elsif ($2 eq 'q') {
295 $r->end(_pad($band, 5, '9') + 100000 + $chr);
296 }
297 elsif ($2 eq 'cen' || $2 eq 'qcen' || $2 eq 'pcen' ) {
298 $r->end(100000 + $chr);
299 }
300 } else { # -p or -q at the end of the range
301 $self->throw("lone '$4' in $& does not make sense");
302 }
303 }
304 #
305 # e.g 10p22.1, 10pter
306 #
307 elsif (defined $3 ) {
308 if ($2 eq 'p') {
309 if ($3 eq 'ter') { # e.g. 10pter
310 $r = new Bio::Range('-start' => $chr,
311 '-end' => $chr,
312 );
313 } else { # e.g 10p22.1
314 $r = new Bio::Range('-start' => -(_pad($band, 5, '9')) + 100000 + $chr,
315 '-end' => -(_pad($band, 5, '0')) + 100000 + $chr,
316 );
317 }
318 } elsif ($2 eq 'q') {
319 if ($3 eq 'ter') { # e.g. 10qter
320 $r = new Bio::Range('-start' => 200000 + $chr,
321 '-end' => 200000 + $chr,
322 );
323 } else { # e.g 10q22.1
324 $r = new Bio::Range('-start' => _pad($band, 5, '0') + 100000 + $chr,
325 '-end' => _pad($band, 5, '9') + 100000 + $chr,
326 );
327 }
328 } else { # e.g. 10qcen1.1 !
329 $self->throw("'cen' in $& does not make sense");
330 }
331 }
332 #
333 # e.g. 10p
334 #
335 elsif (defined $2 ) { # e.g. 10p
336 if ($2 eq'p' ) {
337 $r = new Bio::Range('-start' => $chr,
338 '-end' => 100000 + $chr
339 );
340 }
341 elsif ($2 eq'q' ) {
342 $r = new Bio::Range('-start' => 100000 + $chr,
343 '-end' => 200000 + $chr
344 );
345 } else { # $2 eq 'cen' || 'qcen'
346 $r = new Bio::Range('-start' => 100000 + $chr,
347 '-end' => 100000 + $chr
348 );
349 }
350 }
351 #
352 # chr only, e.g. X
353 #
354 else {
355 $r = new Bio::Range('-start' => $chr,
356 '-end' => 200000 + $chr
357 );
358 }
359 return $r;
360 }
361
362
363 sub _pad {
364 my ($string, $len, $pad_char) = @_;
365 die "function _pad needs a positive integer length, not [$len]"
366 unless $len =~ /^\+?\d+$/;
367 die "function _pad needs a single character pad_char, not [$pad_char]"
368 unless length $pad_char == 1;
369 $string ||= '';
370 # $padded = $text . $pad_char x ( $pad_len - length( $text ) );
371 return $string . $pad_char x ( $len - length( $string ) );
372
373 # my $slen = length $string;
374 # my $add = $len - $slen;
375 # return $string if $add <= 0;
376 # return $string .= $char x $add;
377 }
378
379
380 =head2 range2value
381
382 Title : range2value
383 Usage : $obj->range2value();
384 Function:
385
386 Sets and returns the value string based on start and end
387 values of the Bio::Range object passes as an argument.
388
389 Example :
390 Returns : string or false
391 Args : Bio::Range object
392
393 =cut
394
395 sub range2value {
396 my ($self,$value) = @_;
397 if( defined $value) {
398 if( ! $value->isa('Bio::Range') ) {
399 $self->throw("Is not a Bio::Range object but a [$value]");
400 return undef;
401 }
402 if( ! $value->start ) {
403 $self->throw("Start is not defined in [$value]");
404 return undef;
405 }
406 if( ! $value->end ) {
407 $self->throw("End is not defined in [$value]");
408 return undef;
409 }
410 if( $value->start < 100000 ) {
411 $self->throw("Start value has to be in millions, not ". $value->start);
412 return undef;
413 }
414 if( $value->end < 100000 ) {
415 $self->throw("End value has to be in millions, not ". $value->end);
416 return undef;
417 }
418
419 my ($chr, $arm, $band) = $value->start =~ /(\d+)(\d)(\d{5})/;
420 my ($chr2, $arm2, $band2) = $value->end =~ /(\d+)(\d)(\d{5})/;
421
422 #print STDERR join ("\t", $value->start, $value->end ),"\n";
423 #print STDERR join ("\t", $chr, $arm, $band, $chr2, $arm2, $band2), "\n";
424
425 my ($chrS, $armS, $bandS, $arm2S, $band2S, $sep) = ('', '', '', '', '', '' );
426 LOC: {
427 #
428 # chromosome
429 #
430 if ($chr == 100) {
431 $chrS = 'X';
432 }
433 elsif ($chr == 100) {
434 $chrS = 'Y';
435 } else {
436 $chrS = $chr;
437 }
438 last LOC if $arm == 0 and $arm2 == 2 and $band == 0 and $band2 == 0 ;
439 #
440 # arm
441 #
442 if ($arm == $arm2 ) {
443 if ($arm == 0) {
444 $armS = 'p';
445 #$armS = 'pter' if $band == 0 and $band2 == 0;
446 $bandS = 'ter' if $band == 0;
447 #$arm2S = 'p'; #?
448 }
449 elsif ($arm == 2) {
450 $armS = 'q';
451 $bandS = 'ter' if $band == 0;
452 } else {
453 $armS = 'q';
454 #$arm2S = 'q'; #?
455 $armS = 'cen', if $band == 0;# and $band2 == 0;
456 }
457 } else {
458 if ($arm == 0) {
459 $armS = 'p';
460 $arm2S = 'q';
461 $arm2S = '' if $band == 0 and $band2 == 0;
462 } else {
463 $armS = 'q';
464 $arm2S = 'p';
465 $arm2S = '' if $arm2 == 2 and $band == 0 and $band2 == 0;
466 }
467 }
468 last LOC if $band == $band2 ;
469 my $c;
470 #
471 # first band (ter is hadled with the arm)
472 #
473 if ($bandS ne 'ter') {
474 if ($armS eq 'p') {
475 $band = 100000 - $band;
476 $c = '9';
477 } else {
478 $c = '0';
479 }
480 $band =~ s/$c+$//;
481 $bandS = $band;
482 $bandS = substr($band, 0, 2). '.'. substr($band, 2) if length $band > 2;
483 }
484 last LOC unless $band2;
485 #
486 # second band
487 #
488 if ($arm2 == 0) {
489 $arm2S = 'p';
490 $band2 = 100000 - $band2;
491 $c = '0';
492 } else { # 1 or 2
493 $arm2S = 'q';
494 $c = '9';
495 }
496 if ($band2 == 0) {
497 if ($arm2 == 1) {
498 $arm2S = 'p';
499 $band2S = 'cen';
500 } else {
501 $band2S = 'ter';
502 }
503 last LOC;
504 }
505 last LOC if $band eq $band2 and $arm == $arm2;
506
507 $band2 =~ s/$c+$//;
508 $band2S = $band2;
509 $band2S = substr($band2, 0, 2). '.'. substr($band2, 2) if length $band2 > 2;
510
511 } # end of LOC:
512
513 if ($armS eq 'p' and $arm2S eq 'p') {
514 my $tmp = $band2S;
515 $band2S = $bandS;
516 $bandS = $tmp;
517 }
518 $band2S = '' if $bandS eq $band2S ;
519 $armS = '' if $bandS eq 'cen';
520 $arm2S = '' if $armS eq $arm2S and $band2S ne 'ter';
521 $sep = '-' if $arm2S || $band2S;
522 $self->value( $chrS. $armS. $bandS. $sep. $arm2S. $band2S);
523 }
524 return $self->value;
525 }
526
527 =head2 value
528
529 Title : value
530 Usage : my $pos = $position->value;
531 Function: Get/Set the value for this postion
532 Returns : scalar, value
533 Args : [optional] new value to set
534
535 =cut
536
537 sub value {
538 my ($self,$value) = @_;
539 if( defined $value ) {
540 $self->{'_value'} = $value;
541 $self->{'_numeric'} = $self->cytorange($value);
542 }
543 return $self->{'_value'};
544 }
545
546 =head2 numeric
547
548 Title : numeric
549 Usage : my $num = $position->numeric;
550 Function: Read-only method that is guarantied to return a numeric
551 representation for this position.
552
553 This instanse of the method can also be set, but you better
554 know what you are doing.
555
556 Returns : Bio::RangeI object
557 Args : optional Bio::RangeI object
558
559 See L<Bio::RangeI> for more information.
560
561 =cut
562
563 sub numeric {
564 my ($self, $value) = @_;
565
566 if ($value) {
567 $self->throw("This is not a Bio::RangeI object but a [$value]")
568 unless $value->isa('Bio::RangeI');
569 $self->{'_numeric'} = $value;
570 $self->{'_value'} = $self->range2value($value);
571 }
572 return $self->{'_numeric'};
573 }
574
575
576 =head2 chr
577
578 Title : chr
579 Usage : my $mychr = $position->chr();
580 Function: Get/Set method for the chromosome string of the location.
581 Returns : chromosome value
582 Args : [optional] new chromosome value
583
584 =cut
585
586 sub chr {
587 my ($self,$chr) = @_;
588 if( defined $chr ) {
589 $self->{'_chr'} = $chr;
590 }
591 return $self->{'_chr'};
592 }
593
594
595 1;