Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Seq/PrimaryQual.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: PrimaryQual.pm,v 1.17 2002/10/22 07:38:40 lapp Exp $ | |
| 2 # | |
| 3 # bioperl module for Bio::PrimaryQual | |
| 4 # | |
| 5 # Cared for by Chad Matsalla <bioinformatics@dieselwurks.com> | |
| 6 # | |
| 7 # Copyright Chad Matsalla | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::Seq::PrimaryQual - Bioperl lightweight Quality Object | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 use Bio::Seq::PrimaryQual; | |
| 20 | |
| 21 # you can use either a space-delimited string for quality | |
| 22 | |
| 23 my $string_quals = "10 20 30 40 50 40 30 20 10"; | |
| 24 my $qualobj = Bio::Seq::PrimaryQual->new | |
| 25 ( '-qual' => $string_quals, | |
| 26 '-id' => 'QualityFragment-12', | |
| 27 '-accession_number' => 'X78121', | |
| 28 ); | |
| 29 | |
| 30 # _or_ you can use an array of quality values | |
| 31 | |
| 32 my @q2 = split/ /,$string_quals; | |
| 33 $qualobj = Bio::Seq::PrimaryQual->new( '-qual' => \@q2, | |
| 34 '-primary_id' => 'chads primary_id', | |
| 35 '-desc' => 'chads desc', | |
| 36 '-accession_number' => 'chads accession_number', | |
| 37 '-id' => 'chads id' | |
| 38 ); | |
| 39 | |
| 40 # to get the quality values out: | |
| 41 | |
| 42 my @quals = @{$qualobj->qual()}; | |
| 43 | |
| 44 # to give _new_ quality values | |
| 45 | |
| 46 my $newqualstring = "50 90 1000 20 12 0 0"; | |
| 47 $qualobj->qual($newqualstring); | |
| 48 | |
| 49 | |
| 50 =head1 DESCRIPTION | |
| 51 | |
| 52 This module provides a mechanism for storing quality | |
| 53 values. Much more useful as part of | |
| 54 Bio::Seq::SeqWithQuality where these quality values | |
| 55 are associated with the sequence information. | |
| 56 | |
| 57 =head1 FEEDBACK | |
| 58 | |
| 59 =head2 Mailing Lists | |
| 60 | |
| 61 User feedback is an integral part of the evolution of this and other | |
| 62 Bioperl modules. Send your comments and suggestions preferably to one | |
| 63 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 64 | |
| 65 bioperl-l@bioperl.org - General discussion | |
| 66 http://bio.perl.org/MailList.html - About the mailing lists | |
| 67 | |
| 68 =head2 Reporting Bugs | |
| 69 | |
| 70 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 71 the bugs and their resolution. Bug reports can be submitted via email | |
| 72 or the web: | |
| 73 | |
| 74 bioperl-bugs@bio.perl.org | |
| 75 http://bugzilla.bioperl.org/ | |
| 76 | |
| 77 =head1 AUTHOR - Chad Matsalla | |
| 78 | |
| 79 Email bioinformatics@dieselwurks.com | |
| 80 | |
| 81 =head1 APPENDIX | |
| 82 | |
| 83 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
| 84 | |
| 85 =cut | |
| 86 | |
| 87 | |
| 88 # Let the code begin... | |
| 89 | |
| 90 package Bio::Seq::PrimaryQual; | |
| 91 use vars qw(@ISA %valid_type); | |
| 92 use strict; | |
| 93 | |
| 94 use Bio::Root::Root; | |
| 95 use Bio::Seq::QualI; | |
| 96 | |
| 97 @ISA = qw(Bio::Root::Root Bio::Seq::QualI); | |
| 98 | |
| 99 | |
| 100 =head2 new() | |
| 101 | |
| 102 Title : new() | |
| 103 Usage : $qual = Bio::Seq::PrimaryQual->new | |
| 104 ( -qual => '10 20 30 40 50 50 20 10', | |
| 105 -id => 'human_id', | |
| 106 -accession_number => 'AL000012', | |
| 107 ); | |
| 108 | |
| 109 Function: Returns a new Bio::Seq::PrimaryQual object from basic | |
| 110 constructors, being a string _or_ a reference to an array for the | |
| 111 sequence and strings for id and accession_number. Note that you | |
| 112 can provide an empty quality string. | |
| 113 Returns : a new Bio::Seq::PrimaryQual object | |
| 114 | |
| 115 =cut | |
| 116 | |
| 117 | |
| 118 | |
| 119 | |
| 120 sub new { | |
| 121 my ($class, @args) = @_; | |
| 122 my $self = $class->SUPER::new(@args); | |
| 123 | |
| 124 # default: turn ON the warnings (duh) | |
| 125 my($qual,$id,$acc,$pid,$desc,$given_id) = | |
| 126 $self->_rearrange([qw(QUAL | |
| 127 DISPLAY_ID | |
| 128 ACCESSION_NUMBER | |
| 129 PRIMARY_ID | |
| 130 DESC | |
| 131 ID | |
| 132 )], | |
| 133 @args); | |
| 134 if( defined $id && defined $given_id ) { | |
| 135 if( $id ne $given_id ) { | |
| 136 $self->throw("Provided both id and display_id constructor functions. [$id] [$given_id]"); | |
| 137 } | |
| 138 } | |
| 139 if( defined $given_id ) { $id = $given_id; } | |
| 140 | |
| 141 # note: the sequence string may be empty | |
| 142 $self->qual($qual ? $qual : []); | |
| 143 $id && $self->display_id($id); | |
| 144 $acc && $self->accession_number($acc); | |
| 145 $pid && $self->primary_id($pid); | |
| 146 $desc && $self->desc($desc); | |
| 147 | |
| 148 return $self; | |
| 149 } | |
| 150 | |
| 151 =head2 qual() | |
| 152 | |
| 153 Title : qual() | |
| 154 Usage : @quality_values = @{$obj->qual()}; | |
| 155 Function: Returns the quality as a reference to an array containing the | |
| 156 quality values. The individual elements of the quality array are | |
| 157 not validated and can be any numeric value. | |
| 158 Returns : A reference to an array. | |
| 159 | |
| 160 =cut | |
| 161 | |
| 162 sub qual { | |
| 163 my ($self,$value) = @_; | |
| 164 | |
| 165 if( ! defined $value || length($value) == 0 ) { | |
| 166 $self->{'qual'} ||= []; | |
| 167 } elsif( ref($value) =~ /ARRAY/i ) { | |
| 168 # if the user passed in a reference to an array | |
| 169 $self->{'qual'} = $value; | |
| 170 } elsif(! $self->validate_qual($value)){ | |
| 171 $self->throw("Attempting to set the quality to [$value] which does not look healthy"); | |
| 172 } else { | |
| 173 $self->{'qual'} = [split(/\s+/,$value)]; | |
| 174 } | |
| 175 | |
| 176 return $self->{'qual'}; | |
| 177 } | |
| 178 | |
| 179 =head2 validate_qual($qualstring) | |
| 180 | |
| 181 Title : validate_qual($qualstring) | |
| 182 Usage : print("Valid.") if { &validate_qual($self,$qualities); } | |
| 183 Function: Make sure that the quality, if it has length > 0, contains at | |
| 184 least one digit. Note that quality strings are parsed into arrays | |
| 185 using split/\d+/,$quality_string, so make sure that your quality | |
| 186 scalar looks like this if you want it to be parsed properly. | |
| 187 Returns : 1 for a valid sequence (WHY? Shouldn\'t it return 0? <boggle>) | |
| 188 Args : a scalar (any scalar, why PrimarySeq author?) and a scalar | |
| 189 containing the string to validate. | |
| 190 | |
| 191 =cut | |
| 192 | |
| 193 sub validate_qual { | |
| 194 # how do I validate quality values? | |
| 195 # \d+\s+\d+..., I suppose | |
| 196 my ($self,$qualstr) = @_; | |
| 197 # why the CORE?? -- (Because Bio::PrimarySeqI namespace has a | |
| 198 # length method, you have to qualify | |
| 199 # which length to use) | |
| 200 return 0 if (!defined $qualstr || CORE::length($qualstr) <= 0); | |
| 201 return 1 if( $qualstr =~ /\d/); | |
| 202 | |
| 203 return 0; | |
| 204 } | |
| 205 | |
| 206 =head2 subqual($start,$end) | |
| 207 | |
| 208 Title : subqual($start,$end) | |
| 209 Usage : @subset_of_quality_values = @{$obj->subqual(10,40)}; | |
| 210 Function: returns the quality values from $start to $end, where the | |
| 211 first value is 1 and the number is inclusive, ie 1-2 are the | |
| 212 first two bases of the sequence. Start cannot be larger than | |
| 213 end but can be equal. | |
| 214 Returns : A reference to an array. | |
| 215 Args : a start position and an end position | |
| 216 | |
| 217 =cut | |
| 218 | |
| 219 | |
| 220 sub subqual { | |
| 221 my ($self,$start,$end) = @_; | |
| 222 | |
| 223 if( $start > $end ){ | |
| 224 $self->throw("in subqual, start [$start] has to be greater than end [$end]"); | |
| 225 } | |
| 226 | |
| 227 if( $start <= 0 || $end > $self->length ) { | |
| 228 $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length.""); | |
| 229 } | |
| 230 | |
| 231 # remove one from start, and then length is end-start | |
| 232 | |
| 233 $start--; | |
| 234 $end--; | |
| 235 my @sub_qual_array = @{$self->{qual}}[$start..$end]; | |
| 236 | |
| 237 # return substr $self->seq(), $start, ($end-$start); | |
| 238 return \@sub_qual_array; | |
| 239 | |
| 240 } | |
| 241 | |
| 242 =head2 display_id() | |
| 243 | |
| 244 Title : display_id() | |
| 245 Usage : $id_string = $obj->display_id(); | |
| 246 Function: returns the display id, aka the common name of the Quality | |
| 247 object. | |
| 248 The semantics of this is that it is the most likely string to be | |
| 249 used as an identifier of the quality sequence, and likely to have | |
| 250 "human" readability. The id is equivalent to the ID field of the | |
| 251 GenBank/EMBL databanks and the id field of the Swissprot/sptrembl | |
| 252 database. In fasta format, the >(\S+) is presumed to be the id, | |
| 253 though some people overload the id to embed other information. | |
| 254 Bioperl does not use any embedded information in the ID field, | |
| 255 and people are encouraged to use other mechanisms (accession | |
| 256 field for example, or extending the sequence object) to solve | |
| 257 this. Notice that $seq->id() maps to this function, mainly for | |
| 258 legacy/convience issues | |
| 259 Returns : A string | |
| 260 Args : None | |
| 261 | |
| 262 =cut | |
| 263 | |
| 264 sub display_id { | |
| 265 my ($obj,$value) = @_; | |
| 266 if( defined $value) { | |
| 267 $obj->{'display_id'} = $value; | |
| 268 } | |
| 269 return $obj->{'display_id'}; | |
| 270 | |
| 271 } | |
| 272 | |
| 273 =head2 accession_number() | |
| 274 | |
| 275 Title : accession_number() | |
| 276 Usage : $unique_biological_key = $obj->accession_number(); | |
| 277 Function: Returns the unique biological id for a sequence, commonly | |
| 278 called the accession_number. For sequences from established | |
| 279 databases, the implementors should try to use the correct | |
| 280 accession number. Notice that primary_id() provides the unique id | |
| 281 for the implemetation, allowing multiple objects to have the same | |
| 282 accession number in a particular implementation. For sequences | |
| 283 with no accession number, this method should return "unknown". | |
| 284 Returns : A string | |
| 285 Args : None | |
| 286 | |
| 287 =cut | |
| 288 | |
| 289 sub accession_number { | |
| 290 my( $obj, $acc ) = @_; | |
| 291 | |
| 292 if (defined $acc) { | |
| 293 $obj->{'accession_number'} = $acc; | |
| 294 } else { | |
| 295 $acc = $obj->{'accession_number'}; | |
| 296 $acc = 'unknown' unless defined $acc; | |
| 297 } | |
| 298 return $acc; | |
| 299 } | |
| 300 | |
| 301 =head2 primary_id() | |
| 302 | |
| 303 Title : primary_id() | |
| 304 Usage : $unique_implementation_key = $obj->primary_id(); | |
| 305 Function: Returns the unique id for this object in this implementation. | |
| 306 This allows implementations to manage their own object ids in a | |
| 307 way the implementaiton can control clients can expect one id to | |
| 308 map to one object. For sequences with no accession number, this | |
| 309 method should return a stringified memory location. | |
| 310 Returns : A string | |
| 311 Args : None | |
| 312 | |
| 313 =cut | |
| 314 | |
| 315 sub primary_id { | |
| 316 my ($obj,$value) = @_; | |
| 317 if( defined $value) { | |
| 318 $obj->{'primary_id'} = $value; | |
| 319 } | |
| 320 return $obj->{'primary_id'}; | |
| 321 | |
| 322 } | |
| 323 | |
| 324 =head2 desc() | |
| 325 | |
| 326 Title : desc() | |
| 327 Usage : $qual->desc($newval); | |
| 328 $description = $qual->desc(); | |
| 329 Function: Get/set description text for a qual object | |
| 330 Example : | |
| 331 Returns : Value of desc | |
| 332 Args : newvalue (optional) | |
| 333 | |
| 334 =cut | |
| 335 | |
| 336 sub desc { | |
| 337 my ($obj,$value) = @_; | |
| 338 if( defined $value) { | |
| 339 $obj->{'desc'} = $value; | |
| 340 } | |
| 341 return $obj->{'desc'}; | |
| 342 | |
| 343 } | |
| 344 | |
| 345 =head2 id() | |
| 346 | |
| 347 Title : id() | |
| 348 Usage : $id = $qual->id(); | |
| 349 Function: Return the ID of the quality. This should normally be (and | |
| 350 actually is in the implementation provided here) just a synonym | |
| 351 for display_id(). | |
| 352 Returns : A string. | |
| 353 Args : None. | |
| 354 | |
| 355 =cut | |
| 356 | |
| 357 sub id { | |
| 358 my ($self,$value) = @_; | |
| 359 if( defined $value ) { | |
| 360 return $self->display_id($value); | |
| 361 } | |
| 362 return $self->display_id(); | |
| 363 } | |
| 364 | |
| 365 =head2 length() | |
| 366 | |
| 367 Title : length() | |
| 368 Usage : $length = $qual->length(); | |
| 369 Function: Return the length of the array holding the quality values. | |
| 370 Under most circumstances, this should match the number of quality | |
| 371 values but no validation is done when the PrimaryQual object is | |
| 372 constructed and non-digits could be put into this array. Is this | |
| 373 a bug? Just enough rope... | |
| 374 Returns : A scalar (the number of elements in the quality array). | |
| 375 Args : None. | |
| 376 | |
| 377 =cut | |
| 378 | |
| 379 sub length { | |
| 380 my $self = shift; | |
| 381 if (ref($self->{qual}) ne "ARRAY") { | |
| 382 $self->warn("{qual} is not an array here. Why? It appears to be ".ref($self->{qual})."(".$self->{qual}."). Good thing this can _never_ happen."); | |
| 383 } | |
| 384 return scalar(@{$self->{qual}}); | |
| 385 } | |
| 386 | |
| 387 =head2 qualat($position) | |
| 388 | |
| 389 Title : qualat($position) | |
| 390 Usage : $quality = $obj->qualat(10); | |
| 391 Function: Return the quality value at the given location, where the | |
| 392 first value is 1 and the number is inclusive, ie 1-2 are the first | |
| 393 two bases of the sequence. Start cannot be larger than end but can | |
| 394 be equal. | |
| 395 Returns : A scalar. | |
| 396 Args : A position. | |
| 397 | |
| 398 =cut | |
| 399 | |
| 400 sub qualat { | |
| 401 my ($self,$val) = @_; | |
| 402 my @qualat = @{$self->subqual($val,$val)}; | |
| 403 if (scalar(@qualat) == 1) { | |
| 404 return $qualat[0]; | |
| 405 } | |
| 406 else { | |
| 407 $self->throw("AAAH! qualat provided more then one quality."); | |
| 408 } | |
| 409 } | |
| 410 | |
| 411 =head2 to_string() | |
| 412 | |
| 413 Title : to_string() | |
| 414 Usage : $quality = $obj->to_string(); | |
| 415 Function: Return a textual representation of what the object contains. | |
| 416 For this module, this function will return: | |
| 417 qual | |
| 418 display_id | |
| 419 accession_number | |
| 420 primary_id | |
| 421 desc | |
| 422 id | |
| 423 length | |
| 424 Returns : A scalar. | |
| 425 Args : None. | |
| 426 | |
| 427 =cut | |
| 428 | |
| 429 sub to_string { | |
| 430 my ($self,$out,$result) = shift; | |
| 431 $out = "qual: ".join(',',@{$self->qual()}); | |
| 432 foreach (qw(display_id accession_number primary_id desc id)) { | |
| 433 $result = $self->$_(); | |
| 434 if (!$result) { $result = "<unset>"; } | |
| 435 $out .= "$_: $result\n"; | |
| 436 } | |
| 437 return $out; | |
| 438 } | |
| 439 | |
| 440 | |
| 441 sub to_string_automatic { | |
| 442 my ($self,$sub_result,$out) = shift; | |
| 443 foreach (sort keys %$self) { | |
| 444 print("Working on $_\n"); | |
| 445 eval { $self->$_(); }; | |
| 446 if ($@) { $sub_result = ref($_); } | |
| 447 elsif (!($sub_result = $self->$_())) { | |
| 448 $sub_result = "<unset>"; | |
| 449 } | |
| 450 if (ref($sub_result) eq "ARRAY") { | |
| 451 print("This thing ($_) is an array!\n"); | |
| 452 $sub_result = join(',',@$sub_result); | |
| 453 } | |
| 454 $out .= "$_: ".$sub_result."\n"; | |
| 455 } | |
| 456 return $out; | |
| 457 } | |
| 458 | |
| 459 1; |
