Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Variation/DNAMutation.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: DNAMutation.pm,v 1.11 2002/10/22 07:38:49 lapp Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::Variation::DNAMutation | |
| 4 # | |
| 5 # Cared for by Heikki Lehvaslaiho <heikki@ebi.ac.uk> | |
| 6 # | |
| 7 # Copyright Heikki Lehvaslaiho | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::Variation::DNAMutation - DNA level mutation class | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 $dnamut = Bio::Variation::DNAMutation->new | |
| 20 ('-start' => $start, | |
| 21 '-end' => $end, | |
| 22 '-length' => $len, | |
| 23 '-upStreamSeq' => $upflank, | |
| 24 '-dnStreamSeq' => $dnflank, | |
| 25 '-proof' => $proof, | |
| 26 '-isMutation' => 1, | |
| 27 '-mut_number' => $mut_number | |
| 28 ); | |
| 29 $a1 = Bio::Variation::Allele->new; | |
| 30 $a1->seq('a'); | |
| 31 $dnamut->allele_ori($a1); | |
| 32 my $a2 = Bio::Variation::Allele->new; | |
| 33 $a2->seq('t'); | |
| 34 $dnamut->add_Allele($a2); | |
| 35 | |
| 36 print "Restriction changes are ", $dnamut->restriction_changes, "\n"; | |
| 37 | |
| 38 # add it to a SeqDiff container object | |
| 39 $seqdiff->add_Variant($dnamut); | |
| 40 | |
| 41 | |
| 42 =head1 DESCRIPTION | |
| 43 | |
| 44 The instantiable class Bio::Variation::DNAMutation describes basic | |
| 45 sequence changes in genomic DNA level. It uses methods defined in | |
| 46 superclass Bio::Variation::VariantI. See L<Bio::Variation::VariantI> | |
| 47 for details. | |
| 48 | |
| 49 If the variation described by a DNAMutation object is transcibed, link | |
| 50 the corresponding Bio::Variation::RNAChange object to it using | |
| 51 method RNAChange(). See L<Bio::Variation::RNAChange> for more information. | |
| 52 | |
| 53 =head1 FEEDBACK | |
| 54 | |
| 55 =head2 Mailing Lists | |
| 56 | |
| 57 User feedback is an integral part of the evolution of this and other | |
| 58 Bioperl modules. Send your comments and suggestions preferably to the | |
| 59 Bioperl mailing lists Your participation is much appreciated. | |
| 60 | |
| 61 bioperl-l@bioperl.org - General discussion | |
| 62 http://bio.perl.org/MailList.html - About the mailing lists | |
| 63 | |
| 64 =head2 Reporting Bugs | |
| 65 | |
| 66 report bugs to the Bioperl bug tracking system to help us keep track | |
| 67 the bugs and their resolution. Bug reports can be submitted via | |
| 68 email or the web: | |
| 69 | |
| 70 bioperl-bugs@bio.perl.org | |
| 71 http://bugzilla.bioperl.org/ | |
| 72 | |
| 73 =head1 AUTHOR - Heikki Lehvaslaiho | |
| 74 | |
| 75 Email: heikki@ebi.ac.uk | |
| 76 Address: | |
| 77 | |
| 78 EMBL Outstation, European Bioinformatics Institute | |
| 79 Wellcome Trust Genome Campus, Hinxton | |
| 80 Cambs. CB10 1SD, United Kingdom | |
| 81 | |
| 82 =head1 APPENDIX | |
| 83 | |
| 84 The rest of the documentation details each of the object | |
| 85 methods. Internal methods are usually preceded with a _ | |
| 86 | |
| 87 =cut | |
| 88 | |
| 89 | |
| 90 # Let the code begin... | |
| 91 | |
| 92 | |
| 93 package Bio::Variation::DNAMutation; | |
| 94 my $VERSION=1.0; | |
| 95 use vars qw(@ISA); | |
| 96 use strict; | |
| 97 | |
| 98 # Object preamble - inheritance | |
| 99 use Bio::Variation::VariantI; | |
| 100 | |
| 101 @ISA = qw( Bio::Variation::VariantI ); | |
| 102 | |
| 103 sub new { | |
| 104 my($class,@args) = @_; | |
| 105 my $self = $class->SUPER::new(@args); | |
| 106 | |
| 107 my ($start, $end, $length, $strand, $primary, $source, | |
| 108 $frame, $score, $gff_string, | |
| 109 $allele_ori, $allele_mut, $upstreamseq, $dnstreamseq, | |
| 110 $label, $status, $proof, $region, $region_value, $region_dist, $numbering, | |
| 111 $cpg, $mut_number, $ismutation) = | |
| 112 $self->_rearrange([qw(START | |
| 113 END | |
| 114 LENGTH | |
| 115 STRAND | |
| 116 PRIMARY | |
| 117 SOURCE | |
| 118 FRAME | |
| 119 SCORE | |
| 120 GFF_STRING | |
| 121 ALLELE_ORI | |
| 122 ALLELE_MUT | |
| 123 UPSTREAMSEQ | |
| 124 DNSTREAMSEQ | |
| 125 LABEL | |
| 126 STATUS | |
| 127 PROOF | |
| 128 REGION | |
| 129 REGION_VALUE | |
| 130 REGION_DIST | |
| 131 NUMBERING | |
| 132 CPG | |
| 133 MUT_NUMBER | |
| 134 ISMUTATION | |
| 135 )], | |
| 136 @args); | |
| 137 | |
| 138 $self->primary_tag("Variation"); | |
| 139 | |
| 140 $self->{ 'alleles' } = []; | |
| 141 | |
| 142 $start && $self->start($start); | |
| 143 $end && $self->end($end); | |
| 144 $length && $self->length($length); | |
| 145 $strand && $self->strand($strand); | |
| 146 $primary && $self->primary_tag($primary); | |
| 147 $source && $self->source_tag($source); | |
| 148 $frame && $self->frame($frame); | |
| 149 $score && $self->score($score); | |
| 150 $gff_string && $self->_from_gff_string($gff_string); | |
| 151 | |
| 152 $allele_ori && $self->allele_ori($allele_ori); | |
| 153 $allele_mut && $self->allele_mut($allele_mut); | |
| 154 $upstreamseq && $self->upStreamSeq($upstreamseq); | |
| 155 $dnstreamseq && $self->dnStreamSeq($dnstreamseq); | |
| 156 | |
| 157 $label && $self->label($label); | |
| 158 $status && $self->status($status); | |
| 159 $proof && $self->proof($proof); | |
| 160 $region && $self->region($region); | |
| 161 $region_value && $self->region_value($region_value); | |
| 162 $region_dist && $self->region_dist($region_dist); | |
| 163 $numbering && $self->numbering($numbering); | |
| 164 $mut_number && $self->mut_number($mut_number); | |
| 165 $ismutation && $self->isMutation($ismutation); | |
| 166 | |
| 167 $cpg && $self->CpG($cpg); | |
| 168 | |
| 169 return $self; # success - we hope! | |
| 170 } | |
| 171 | |
| 172 | |
| 173 =head2 CpG | |
| 174 | |
| 175 Title : CpG | |
| 176 Usage : $obj->CpG() | |
| 177 Function: sets and returns boolean values for variation | |
| 178 hitting a CpG site. Unset value return -1. | |
| 179 Example : $obj->CpG() | |
| 180 Returns : boolean | |
| 181 Args : optional true of false value | |
| 182 | |
| 183 | |
| 184 =cut | |
| 185 | |
| 186 | |
| 187 sub CpG { | |
| 188 my ($obj,$value) = @_; | |
| 189 if( defined $value) { | |
| 190 $value ? ($value = 1) : ($value = 0); | |
| 191 $obj->{'cpg'} = $value; | |
| 192 } | |
| 193 elsif (not defined $obj->{'label'}) { | |
| 194 $obj->{'cpg'} = $obj->_CpG_value; | |
| 195 } | |
| 196 else { | |
| 197 return $obj->{'cpg'}; | |
| 198 } | |
| 199 } | |
| 200 | |
| 201 | |
| 202 | |
| 203 sub _CpG_value { | |
| 204 my ($self) = @_; | |
| 205 if ($self->allele_ori eq $self->allele_mut and length ($self->allele_ori) == 1 ) { | |
| 206 | |
| 207 # valid only for point mutations | |
| 208 # CpG methylation-mediated deamination: | |
| 209 # CG -> TG | CG -> CA substitutions | |
| 210 # implementation here is less strict: if CpG dinucleotide was hit | |
| 211 | |
| 212 if ( ( ($self->allele_ori eq 'c') && (substr($self->upStreamSeq, 0, 1) eq 'g') ) || | |
| 213 ( ($self->allele_ori eq 'g') && (substr($self->dnStreamSeq, -1, 1) eq 'c') ) ) { | |
| 214 return 1; | |
| 215 } | |
| 216 else { | |
| 217 return 0; | |
| 218 } | |
| 219 } else { | |
| 220 $self->warn('CpG makes sense only in the context of point mutation'); | |
| 221 return undef; | |
| 222 } | |
| 223 } | |
| 224 | |
| 225 | |
| 226 =head2 RNAChange | |
| 227 | |
| 228 Title : RNAChange | |
| 229 Usage : $mutobj = $obj->RNAChange; | |
| 230 : $mutobj = $obj->RNAChange($objref); | |
| 231 Function: Returns or sets the link-reference to a mutation/change object. | |
| 232 If there is no link, it will return undef | |
| 233 Returns : an obj_ref or undef | |
| 234 | |
| 235 =cut | |
| 236 | |
| 237 | |
| 238 sub RNAChange { | |
| 239 my ($self,$value) = @_; | |
| 240 if (defined $value) { | |
| 241 if( ! $value->isa('Bio::Variation::RNAChange') ) { | |
| 242 $self->throw("Is not a Bio::Variation::RNAChange object but a [$self]"); | |
| 243 return (undef); | |
| 244 } | |
| 245 else { | |
| 246 $self->{'RNAChange'} = $value; | |
| 247 } | |
| 248 } | |
| 249 unless (exists $self->{'RNAChange'}) { | |
| 250 return (undef); | |
| 251 } else { | |
| 252 return $self->{'RNAChange'}; | |
| 253 } | |
| 254 } | |
| 255 | |
| 256 | |
| 257 =head2 label | |
| 258 | |
| 259 Title : label | |
| 260 Usage : $obj->label(); | |
| 261 Function: | |
| 262 | |
| 263 Sets and returns mutation event label(s). If value is not | |
| 264 set, or no argument is given returns false. Each | |
| 265 instantiable subclass of L<Bio::Variation::VariantI> needs | |
| 266 to implement this method. Valid values are listed in | |
| 267 'Mutation event controlled vocabulary' in | |
| 268 http://www.ebi.ac.uk/mutations/recommendations/mutevent.html. | |
| 269 | |
| 270 Example : | |
| 271 Returns : string | |
| 272 Args : string | |
| 273 | |
| 274 =cut | |
| 275 | |
| 276 | |
| 277 sub label { | |
| 278 my ($self, $value) = @_; | |
| 279 my ($o, $m, $type); | |
| 280 $o = $self->allele_ori->seq if $self->allele_ori and $self->allele_ori->seq; | |
| 281 $m = $self->allele_mut->seq if $self->allele_mut and $self->allele_mut->seq; | |
| 282 | |
| 283 if (not $o and not $m ) { | |
| 284 $self->warn("[DNAMutation, label] Both alleles should not be empty!\n"); | |
| 285 $type = 'no change'; # is this enough? | |
| 286 } | |
| 287 elsif ($o && $m && length($o) == length($m) && length($o) == 1) { | |
| 288 $type = 'point'; | |
| 289 $type .= ", ". _point_type_label($o, $m); | |
| 290 } | |
| 291 elsif (not $o ) { | |
| 292 $type = 'insertion'; | |
| 293 } | |
| 294 elsif (not $m ) { | |
| 295 $type = 'deletion'; | |
| 296 } | |
| 297 else { | |
| 298 $type = 'complex'; | |
| 299 } | |
| 300 $self->{'label'} = $type; | |
| 301 return $self->{'label'}; | |
| 302 } | |
| 303 | |
| 304 | |
| 305 sub _point_type_label { | |
| 306 my ($o, $m) = @_; | |
| 307 my ($type); | |
| 308 my %transition = ('a' => 'g', | |
| 309 'g' => 'a', | |
| 310 'c' => 't', | |
| 311 't' => 'c'); | |
| 312 $o = lc $o; | |
| 313 $m = lc $m; | |
| 314 if ($o eq $m) { | |
| 315 $type = 'no change'; | |
| 316 } | |
| 317 elsif ($transition{$o} eq $m ) { | |
| 318 $type = 'transition'; | |
| 319 } | |
| 320 else { | |
| 321 $type = 'transversion'; | |
| 322 } | |
| 323 } | |
| 324 | |
| 325 | |
| 326 =head2 sysname | |
| 327 | |
| 328 Title : sysname | |
| 329 Usage : $self->sysname | |
| 330 Function: | |
| 331 | |
| 332 This subroutine creates a string corresponding to the | |
| 333 'systematic name' of the mutation. Systematic name is | |
| 334 specified in Antonorakis & MDI Nomenclature Working Group: | |
| 335 Human Mutation 11:1-3, 1998. | |
| 336 http://www.interscience.wiley.com/jpages/1059-7794/nomenclature.html | |
| 337 Returns : string | |
| 338 | |
| 339 =cut | |
| 340 | |
| 341 | |
| 342 sub sysname { | |
| 343 my ($self,$value) = @_; | |
| 344 if( defined $value) { | |
| 345 $self->{'sysname'} = $value; | |
| 346 } else { | |
| 347 $self->warn('Mutation start position is not defined') | |
| 348 if not defined $self->start; | |
| 349 my $sysname = ''; | |
| 350 # show the alphabet only if $self->SeqDiff->alphabet is set; | |
| 351 my $mol = ''; | |
| 352 if ($self->SeqDiff->alphabet && $self->SeqDiff->alphabet eq 'dna') { | |
| 353 $mol = 'g.'; | |
| 354 } | |
| 355 elsif ($self->SeqDiff->alphabet && $self->SeqDiff->alphabet eq 'rna') { | |
| 356 $mol = 'c.'; | |
| 357 } | |
| 358 my $sep; | |
| 359 if ($self->isMutation) { | |
| 360 $sep = '>'; | |
| 361 } else { | |
| 362 $sep = '|'; | |
| 363 } | |
| 364 my $sign = '+'; | |
| 365 $sign = '' if $self->start < 1; | |
| 366 $sysname .= $mol ;#if $mol; | |
| 367 $sysname .= $sign. $self->start; | |
| 368 | |
| 369 my @alleles = $self->each_Allele; | |
| 370 $self->allele_mut($alleles[0]); | |
| 371 | |
| 372 $sysname .= 'del' if $self->label =~ /deletion/; | |
| 373 $sysname .= 'ins' if $self->label =~ /insertion/; | |
| 374 $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq; | |
| 375 | |
| 376 | |
| 377 | |
| 378 #push @alleles, $self->allele_mut if $self->allele_mut; | |
| 379 foreach my $allele (@alleles) { | |
| 380 $self->allele_mut($allele); | |
| 381 $sysname .= $sep if $self->label =~ /point/ or $self->label =~ /complex/; | |
| 382 $sysname .= uc $self->allele_mut->seq if $self->allele_mut->seq; | |
| 383 } | |
| 384 $self->{'sysname'} = $sysname; | |
| 385 #$self->{'sysname'} = $sign. $self->start. | |
| 386 # uc $self->allele_ori->seq. $sep. uc $self->allele_mut->seq; | |
| 387 } | |
| 388 return $self->{'sysname'}; | |
| 389 } | |
| 390 | |
| 391 1; |
