Mercurial > repos > mahtabm > ensemb_rep_gvl
comparison variant_effect_predictor/Bio/Map/CytoPosition.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2bc9b66ada89 |
---|---|
1 # $Id: CytoPosition.pm,v 1.4 2002/10/22 07:38:35 lapp Exp $ | |
2 # | |
3 # BioPerl module for Bio::Map::CytoPosition | |
4 # | |
5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
6 # | |
7 # Copyright Heikki Lehvaslaiho | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Map::CytoPosition - Marker class with cytogenetic band storing attributes | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 $m1 = Bio::Map::CytoPosition->new ( '-id' => 'A1', | |
20 '-value' => '2q1-3' | |
21 ); | |
22 $m2 = Bio::Map::CytoPosition->new ( '-id' => 'A2', | |
23 '-value' => '2q2' | |
24 ); | |
25 | |
26 if ($m1->cytorange->overlaps($m2->cytorange)) { | |
27 print "Makers overlap\n"; | |
28 } | |
29 | |
30 | |
31 =head1 DESCRIPTION | |
32 | |
33 CytoPosition is marker (Bio::Map::MarkerI compliant) with a location in a | |
34 cytogenetic map. See L<Bio::Map::MarkerI> for more information. | |
35 | |
36 Cytogenetic locations are names of bands visible in stained mitotic | |
37 eucaryotic chromosomes. The naming follows strict rules which are | |
38 consistant at least in higher vertebates, e.g. mammals. The chromosome | |
39 name preceds the band names. | |
40 | |
41 The shorter arm of the chromosome is called 'p' ('petit') and usually | |
42 drawn pointing up. The lower arm is called 'q' ('queue'). The bands | |
43 are named from the region separting these, a centromere (cen), towards | |
44 the tips or telomeric regions (ter) counting from 1 upwards. Depending | |
45 of the resolution used the bands are identified with one or more | |
46 digit. The first digit determines the major band and subsequent digits | |
47 sub bands: p1 band can be divided into subbands p11, p12 and 13 and | |
48 p11 can furter be divided into subbands p11.1 and p11.2. The dot after | |
49 second digit makes it easier to read the values. A region between ands | |
50 is given from the centromere outwards towards the telomere (e.g. 2p2-5 | |
51 or 3p21-35) or from a band in the p arm to a band in the q arm. | |
52 | |
53 =head1 FEEDBACK | |
54 | |
55 =head2 Mailing Lists | |
56 | |
57 User feedback is an integral part of the evolution of this and other | |
58 Bioperl modules. Send your comments and suggestions preferably to the | |
59 Bioperl mailing lists Your participation is much appreciated. | |
60 | |
61 bioperl-l@bioperl.org - General discussion | |
62 http://bio.perl.org/MailList.html - About the mailing lists | |
63 | |
64 =head2 Reporting Bugs | |
65 | |
66 report bugs to the Bioperl bug tracking system to help us keep track | |
67 the bugs and their resolution. Bug reports can be submitted via | |
68 email or the web: | |
69 | |
70 bioperl-bugs@bio.perl.org | |
71 http://bugzilla.bioperl.org/ | |
72 | |
73 =head1 AUTHOR - Heikki Lehvaslaiho | |
74 | |
75 Email: heikki@ebi.ac.uk | |
76 Address: | |
77 | |
78 EMBL Outstation, European Bioinformatics Institute | |
79 Wellcome Trust Genome Campus, Hinxton | |
80 Cambs. CB10 1SD, United Kingdom | |
81 | |
82 | |
83 =head1 APPENDIX | |
84 | |
85 The rest of the documentation details each of the object | |
86 methods. Internal methods are usually preceded with a _ | |
87 | |
88 =cut | |
89 | |
90 | |
91 # Let the code begin... | |
92 | |
93 package Bio::Map::CytoPosition; | |
94 use vars qw(@ISA $VERSION); | |
95 | |
96 use strict; | |
97 use integer; | |
98 | |
99 $VERSION=1.0; | |
100 | |
101 # Object preamble - inheritance | |
102 | |
103 use Bio::Variation::VariantI; | |
104 use Bio::RangeI; | |
105 use Bio::Map::Position; | |
106 | |
107 @ISA = qw( Bio::Map::Position Bio::Variation::VariantI ); | |
108 | |
109 | |
110 =head2 cytorange | |
111 | |
112 Title : cytorange | |
113 Usage : $obj->cytorange(); | |
114 Function: | |
115 | |
116 Converts cytogenetic location set by value method into | |
117 an integer range. The chromosome number determines the | |
118 "millions" in the values. Human X and Y chromosome | |
119 symbols are represented by values 100 and 101. | |
120 | |
121 The localization within chromosomes are converted into | |
122 values between the range of 0 and 200,000: | |
123 | |
124 pter cen qter | |
125 |------------------------|-------------------------| | |
126 0 100,000 200,000 | |
127 | |
128 The values between -100,000 through 0 for centromere to | |
129 100,000 would have reflected the band numbering better but | |
130 use of positive integers was choosen since the | |
131 transformation is very easy. These values are not metric. | |
132 | |
133 Each band defines a range in a chromosome. A band string | |
134 is converted into a range by padding it with lower and and | |
135 higher end digits (for q arm: '0' and '9') to the length | |
136 of five. The arm and chromosome values are added to these: | |
137 e.g. 21000 & 21999 (band 21) + 100,000 (q arm) + 2,000,000 | |
138 (chromosome 2) => 2q21 : 2,121,000 .. 2,121,999. Note that | |
139 this notation breaks down if there is a band or a subband | |
140 using digit 9 in its name! This is not the case in human | |
141 karyotype. | |
142 | |
143 The full algorithm used for bands: | |
144 | |
145 if arm is 'q' then | |
146 pad char for start is '0', for end '9' | |
147 range is chromosome + 100,000 + padded range start or end | |
148 elsif arm is 'p' then | |
149 pad char for start is '9', for end '0' | |
150 range is chromosome + 100,000 - padded range start or end | |
151 | |
152 Example : Returns : Bio::Range object or undef | |
153 Args : none | |
154 | |
155 =cut | |
156 | |
157 | |
158 sub cytorange { | |
159 my ($self) = @_; | |
160 my ($chr, $r, $band, $band2, $arm, $arm2, $lc, $uc, $lcchar, $ucchar) = undef; | |
161 | |
162 return $r if not defined $self->value; # returns undef | |
163 $self->value =~ | |
164 # -----1----- --------2--------- -----3----- -------4------- ---6--- | |
165 m/([XY]|[0-9]+)(cen|qcen|pcen|[pq])?(ter|[.0-9]+)?-?([pq]?(cen|ter)?)?([.0-9]+)?/; | |
166 $self->warn("Not a valid value: ". $self->value), return $r | |
167 if not defined $1 ; # returns undef | |
168 | |
169 $chr = uc $1; | |
170 $self->chr($chr); | |
171 | |
172 $chr = 100 if $chr eq 'X'; | |
173 $chr = 101 if $chr eq 'Y'; | |
174 $chr *= 1000000; | |
175 | |
176 $r = new Bio::Range(); | |
177 | |
178 $band = ''; | |
179 if (defined $3 ) { | |
180 $2 || $self->throw("$& does not make sense: 'arm' or 'cen' missing"); | |
181 $band = $3; | |
182 $band =~ tr/\.//d; | |
183 } | |
184 if (defined $6 ) { | |
185 $arm2 = $4; | |
186 $arm2 = $2 if $4 eq ''; # it is not necessary to repeat the arm [p|q] | |
187 $band2 = $6; | |
188 $band2 =~ tr/\.//d; | |
189 #find the correct order | |
190 # print STDERR "-|$&|----2|$2|-----3|$band|---4|$4|--------arm2|$arm2|-------------\n"; | |
191 if ($band ne '' and (defined $arm2 and $2 ne $arm2 and $arm2 eq 'q') ) { | |
192 $lc = 'start'; $lcchar = '9'; | |
193 $uc = 'end'; $ucchar = '9'; | |
194 } | |
195 elsif ($band ne 'ter' and $2 ne $arm2 and $arm2 eq 'p') { | |
196 $lc = 'end'; $lcchar = '9'; | |
197 $uc = 'start'; $ucchar = '9'; | |
198 } | |
199 elsif ($band eq 'ter' and $arm2 eq 'p') { | |
200 $uc = 'start'; $ucchar = '9'; | |
201 } # $2 eq $arm2 | |
202 elsif ($arm2 eq 'q') { | |
203 if (_pad($band, 5, '0') < _pad($band2, 5, '0')) { | |
204 $lc = 'start'; $lcchar = '0'; | |
205 $uc = 'end'; $ucchar = '9'; | |
206 } else { | |
207 $lc = 'end'; $lcchar = '9'; | |
208 $uc = 'start'; $ucchar = '0'; | |
209 } | |
210 } | |
211 elsif ($arm2 eq 'p') { | |
212 if (_pad($band, 5, '0') < _pad($band2, 5, '0')) { | |
213 $lc = 'end'; $lcchar = '0'; | |
214 $uc = 'start'; $ucchar = '9'; | |
215 } else { | |
216 $lc = 'start'; $lcchar = '9'; | |
217 $uc = 'end'; $ucchar = '0'; | |
218 } | |
219 } | |
220 else { | |
221 $self->throw("How did you end up here? $&"); | |
222 } | |
223 | |
224 #print STDERR "-------$arm2--------$band2---------$ucchar--------------\n"; | |
225 if ( (defined $arm2 and $arm2 eq 'p') or (defined $arm2 and $arm2 eq 'p') ) { | |
226 $r->$uc(-(_pad($band2, 5, $ucchar)) + 100000 + $chr ); | |
227 if (defined $3 and $3 eq 'ter') { | |
228 $r->end(200000 + $chr); | |
229 } | |
230 elsif ($2 eq 'cen' or $2 eq 'qcen' or $2 eq 'pcen'){ | |
231 $r->$lc(100000 + $chr); | |
232 } | |
233 elsif ($2 eq 'q') { | |
234 $r->$lc(_pad($band, 5, $lcchar) + 100000 + $chr ); | |
235 } else { | |
236 $r->$lc(-(_pad($band, 5, $lcchar)) + 100000 + $chr ); | |
237 } | |
238 } else { #if:$arm2=q e.g. 9p22-q32 | |
239 #print STDERR "-------$arm2--------$band2---------$ucchar--------------\n"; | |
240 $r->$uc(_pad($band2, 5, $ucchar) + 100000 + $chr); | |
241 if ($2 eq 'cen' or $2 eq 'pcen') { | |
242 $r->$lc(100000 + $chr); | |
243 } | |
244 elsif ($2 eq 'p') { | |
245 if ($3 eq 'ter') { | |
246 $r->$lc(200000 + $chr); | |
247 } else { | |
248 $r->$lc(-(_pad($band, 5, $lcchar)) + 100000 + $chr); | |
249 } | |
250 } else { #$2.==q | |
251 $r->$lc(_pad($band, 5, $lcchar) + 100000 + $chr); | |
252 } | |
253 } | |
254 } | |
255 # | |
256 # e.g. 10p22.1-cen | |
257 # | |
258 elsif (defined $4 and $4 ne '') { | |
259 #print STDERR "$4-----$&----\n"; | |
260 if ($4 eq 'cen' || $4 eq 'qcen' || $4 eq 'pcen') { # e.g. 10p22.1-cen; | |
261 # '10pcen-qter' does not really make sense but lets have it in anyway | |
262 $r->end(100000 + $chr); | |
263 if ($2 eq 'p') { | |
264 if ($3 eq 'ter') { | |
265 $r->start($chr); | |
266 } else { | |
267 $r->start(_pad($band, 5, '9') + $chr); | |
268 } | |
269 } | |
270 elsif ($2 eq 'cen') { | |
271 $self->throw("'cen-cen' does not make sense: $&"); | |
272 } else { | |
273 $self->throw("Only order p-cen is valid: $&"); | |
274 } | |
275 } | |
276 elsif ($4 eq 'qter' || $4 eq 'ter') { # e.g. 10p22.1-qter, 1p21-qter, 10pcen-qter, 7q34-qter | |
277 $r->end(200000 + $chr); | |
278 if ($2 eq 'p'){ | |
279 $r->start(-(_pad($band, 5, '9')) + 100000 + $chr); #??? OK? | |
280 } | |
281 elsif ($2 eq 'q') { | |
282 $r->start(_pad($band, 5, '0') + 100000 + $chr); | |
283 } | |
284 elsif ($2 eq 'cen' || $2 eq 'qcen' || $2 eq 'pcen' ) { | |
285 $r->start(100000 + $chr); | |
286 } | |
287 } | |
288 elsif ($4 eq 'pter' ) { | |
289 #print STDERR "$2,$3--$4-----$&----\n"; | |
290 $r->start( $chr); | |
291 if ($2 eq 'p'){ | |
292 $r->end(-(_pad($band, 5, '0')) + 100000 + $chr); | |
293 } | |
294 elsif ($2 eq 'q') { | |
295 $r->end(_pad($band, 5, '9') + 100000 + $chr); | |
296 } | |
297 elsif ($2 eq 'cen' || $2 eq 'qcen' || $2 eq 'pcen' ) { | |
298 $r->end(100000 + $chr); | |
299 } | |
300 } else { # -p or -q at the end of the range | |
301 $self->throw("lone '$4' in $& does not make sense"); | |
302 } | |
303 } | |
304 # | |
305 # e.g 10p22.1, 10pter | |
306 # | |
307 elsif (defined $3 ) { | |
308 if ($2 eq 'p') { | |
309 if ($3 eq 'ter') { # e.g. 10pter | |
310 $r = new Bio::Range('-start' => $chr, | |
311 '-end' => $chr, | |
312 ); | |
313 } else { # e.g 10p22.1 | |
314 $r = new Bio::Range('-start' => -(_pad($band, 5, '9')) + 100000 + $chr, | |
315 '-end' => -(_pad($band, 5, '0')) + 100000 + $chr, | |
316 ); | |
317 } | |
318 } elsif ($2 eq 'q') { | |
319 if ($3 eq 'ter') { # e.g. 10qter | |
320 $r = new Bio::Range('-start' => 200000 + $chr, | |
321 '-end' => 200000 + $chr, | |
322 ); | |
323 } else { # e.g 10q22.1 | |
324 $r = new Bio::Range('-start' => _pad($band, 5, '0') + 100000 + $chr, | |
325 '-end' => _pad($band, 5, '9') + 100000 + $chr, | |
326 ); | |
327 } | |
328 } else { # e.g. 10qcen1.1 ! | |
329 $self->throw("'cen' in $& does not make sense"); | |
330 } | |
331 } | |
332 # | |
333 # e.g. 10p | |
334 # | |
335 elsif (defined $2 ) { # e.g. 10p | |
336 if ($2 eq'p' ) { | |
337 $r = new Bio::Range('-start' => $chr, | |
338 '-end' => 100000 + $chr | |
339 ); | |
340 } | |
341 elsif ($2 eq'q' ) { | |
342 $r = new Bio::Range('-start' => 100000 + $chr, | |
343 '-end' => 200000 + $chr | |
344 ); | |
345 } else { # $2 eq 'cen' || 'qcen' | |
346 $r = new Bio::Range('-start' => 100000 + $chr, | |
347 '-end' => 100000 + $chr | |
348 ); | |
349 } | |
350 } | |
351 # | |
352 # chr only, e.g. X | |
353 # | |
354 else { | |
355 $r = new Bio::Range('-start' => $chr, | |
356 '-end' => 200000 + $chr | |
357 ); | |
358 } | |
359 return $r; | |
360 } | |
361 | |
362 | |
363 sub _pad { | |
364 my ($string, $len, $pad_char) = @_; | |
365 die "function _pad needs a positive integer length, not [$len]" | |
366 unless $len =~ /^\+?\d+$/; | |
367 die "function _pad needs a single character pad_char, not [$pad_char]" | |
368 unless length $pad_char == 1; | |
369 $string ||= ''; | |
370 # $padded = $text . $pad_char x ( $pad_len - length( $text ) ); | |
371 return $string . $pad_char x ( $len - length( $string ) ); | |
372 | |
373 # my $slen = length $string; | |
374 # my $add = $len - $slen; | |
375 # return $string if $add <= 0; | |
376 # return $string .= $char x $add; | |
377 } | |
378 | |
379 | |
380 =head2 range2value | |
381 | |
382 Title : range2value | |
383 Usage : $obj->range2value(); | |
384 Function: | |
385 | |
386 Sets and returns the value string based on start and end | |
387 values of the Bio::Range object passes as an argument. | |
388 | |
389 Example : | |
390 Returns : string or false | |
391 Args : Bio::Range object | |
392 | |
393 =cut | |
394 | |
395 sub range2value { | |
396 my ($self,$value) = @_; | |
397 if( defined $value) { | |
398 if( ! $value->isa('Bio::Range') ) { | |
399 $self->throw("Is not a Bio::Range object but a [$value]"); | |
400 return undef; | |
401 } | |
402 if( ! $value->start ) { | |
403 $self->throw("Start is not defined in [$value]"); | |
404 return undef; | |
405 } | |
406 if( ! $value->end ) { | |
407 $self->throw("End is not defined in [$value]"); | |
408 return undef; | |
409 } | |
410 if( $value->start < 100000 ) { | |
411 $self->throw("Start value has to be in millions, not ". $value->start); | |
412 return undef; | |
413 } | |
414 if( $value->end < 100000 ) { | |
415 $self->throw("End value has to be in millions, not ". $value->end); | |
416 return undef; | |
417 } | |
418 | |
419 my ($chr, $arm, $band) = $value->start =~ /(\d+)(\d)(\d{5})/; | |
420 my ($chr2, $arm2, $band2) = $value->end =~ /(\d+)(\d)(\d{5})/; | |
421 | |
422 #print STDERR join ("\t", $value->start, $value->end ),"\n"; | |
423 #print STDERR join ("\t", $chr, $arm, $band, $chr2, $arm2, $band2), "\n"; | |
424 | |
425 my ($chrS, $armS, $bandS, $arm2S, $band2S, $sep) = ('', '', '', '', '', '' ); | |
426 LOC: { | |
427 # | |
428 # chromosome | |
429 # | |
430 if ($chr == 100) { | |
431 $chrS = 'X'; | |
432 } | |
433 elsif ($chr == 100) { | |
434 $chrS = 'Y'; | |
435 } else { | |
436 $chrS = $chr; | |
437 } | |
438 last LOC if $arm == 0 and $arm2 == 2 and $band == 0 and $band2 == 0 ; | |
439 # | |
440 # arm | |
441 # | |
442 if ($arm == $arm2 ) { | |
443 if ($arm == 0) { | |
444 $armS = 'p'; | |
445 #$armS = 'pter' if $band == 0 and $band2 == 0; | |
446 $bandS = 'ter' if $band == 0; | |
447 #$arm2S = 'p'; #? | |
448 } | |
449 elsif ($arm == 2) { | |
450 $armS = 'q'; | |
451 $bandS = 'ter' if $band == 0; | |
452 } else { | |
453 $armS = 'q'; | |
454 #$arm2S = 'q'; #? | |
455 $armS = 'cen', if $band == 0;# and $band2 == 0; | |
456 } | |
457 } else { | |
458 if ($arm == 0) { | |
459 $armS = 'p'; | |
460 $arm2S = 'q'; | |
461 $arm2S = '' if $band == 0 and $band2 == 0; | |
462 } else { | |
463 $armS = 'q'; | |
464 $arm2S = 'p'; | |
465 $arm2S = '' if $arm2 == 2 and $band == 0 and $band2 == 0; | |
466 } | |
467 } | |
468 last LOC if $band == $band2 ; | |
469 my $c; | |
470 # | |
471 # first band (ter is hadled with the arm) | |
472 # | |
473 if ($bandS ne 'ter') { | |
474 if ($armS eq 'p') { | |
475 $band = 100000 - $band; | |
476 $c = '9'; | |
477 } else { | |
478 $c = '0'; | |
479 } | |
480 $band =~ s/$c+$//; | |
481 $bandS = $band; | |
482 $bandS = substr($band, 0, 2). '.'. substr($band, 2) if length $band > 2; | |
483 } | |
484 last LOC unless $band2; | |
485 # | |
486 # second band | |
487 # | |
488 if ($arm2 == 0) { | |
489 $arm2S = 'p'; | |
490 $band2 = 100000 - $band2; | |
491 $c = '0'; | |
492 } else { # 1 or 2 | |
493 $arm2S = 'q'; | |
494 $c = '9'; | |
495 } | |
496 if ($band2 == 0) { | |
497 if ($arm2 == 1) { | |
498 $arm2S = 'p'; | |
499 $band2S = 'cen'; | |
500 } else { | |
501 $band2S = 'ter'; | |
502 } | |
503 last LOC; | |
504 } | |
505 last LOC if $band eq $band2 and $arm == $arm2; | |
506 | |
507 $band2 =~ s/$c+$//; | |
508 $band2S = $band2; | |
509 $band2S = substr($band2, 0, 2). '.'. substr($band2, 2) if length $band2 > 2; | |
510 | |
511 } # end of LOC: | |
512 | |
513 if ($armS eq 'p' and $arm2S eq 'p') { | |
514 my $tmp = $band2S; | |
515 $band2S = $bandS; | |
516 $bandS = $tmp; | |
517 } | |
518 $band2S = '' if $bandS eq $band2S ; | |
519 $armS = '' if $bandS eq 'cen'; | |
520 $arm2S = '' if $armS eq $arm2S and $band2S ne 'ter'; | |
521 $sep = '-' if $arm2S || $band2S; | |
522 $self->value( $chrS. $armS. $bandS. $sep. $arm2S. $band2S); | |
523 } | |
524 return $self->value; | |
525 } | |
526 | |
527 =head2 value | |
528 | |
529 Title : value | |
530 Usage : my $pos = $position->value; | |
531 Function: Get/Set the value for this postion | |
532 Returns : scalar, value | |
533 Args : [optional] new value to set | |
534 | |
535 =cut | |
536 | |
537 sub value { | |
538 my ($self,$value) = @_; | |
539 if( defined $value ) { | |
540 $self->{'_value'} = $value; | |
541 $self->{'_numeric'} = $self->cytorange($value); | |
542 } | |
543 return $self->{'_value'}; | |
544 } | |
545 | |
546 =head2 numeric | |
547 | |
548 Title : numeric | |
549 Usage : my $num = $position->numeric; | |
550 Function: Read-only method that is guarantied to return a numeric | |
551 representation for this position. | |
552 | |
553 This instanse of the method can also be set, but you better | |
554 know what you are doing. | |
555 | |
556 Returns : Bio::RangeI object | |
557 Args : optional Bio::RangeI object | |
558 | |
559 See L<Bio::RangeI> for more information. | |
560 | |
561 =cut | |
562 | |
563 sub numeric { | |
564 my ($self, $value) = @_; | |
565 | |
566 if ($value) { | |
567 $self->throw("This is not a Bio::RangeI object but a [$value]") | |
568 unless $value->isa('Bio::RangeI'); | |
569 $self->{'_numeric'} = $value; | |
570 $self->{'_value'} = $self->range2value($value); | |
571 } | |
572 return $self->{'_numeric'}; | |
573 } | |
574 | |
575 | |
576 =head2 chr | |
577 | |
578 Title : chr | |
579 Usage : my $mychr = $position->chr(); | |
580 Function: Get/Set method for the chromosome string of the location. | |
581 Returns : chromosome value | |
582 Args : [optional] new chromosome value | |
583 | |
584 =cut | |
585 | |
586 sub chr { | |
587 my ($self,$chr) = @_; | |
588 if( defined $chr ) { | |
589 $self->{'_chr'} = $chr; | |
590 } | |
591 return $self->{'_chr'}; | |
592 } | |
593 | |
594 | |
595 1; |