Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/Seq/PrimaryQual.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: PrimaryQual.pm,v 1.17 2002/10/22 07:38:40 lapp Exp $ | |
2 # | |
3 # bioperl module for Bio::PrimaryQual | |
4 # | |
5 # Cared for by Chad Matsalla <bioinformatics@dieselwurks.com> | |
6 # | |
7 # Copyright Chad Matsalla | |
8 # | |
9 # You may distribute this module under the same terms as perl itself | |
10 | |
11 # POD documentation - main docs before the code | |
12 | |
13 =head1 NAME | |
14 | |
15 Bio::Seq::PrimaryQual - Bioperl lightweight Quality Object | |
16 | |
17 =head1 SYNOPSIS | |
18 | |
19 use Bio::Seq::PrimaryQual; | |
20 | |
21 # you can use either a space-delimited string for quality | |
22 | |
23 my $string_quals = "10 20 30 40 50 40 30 20 10"; | |
24 my $qualobj = Bio::Seq::PrimaryQual->new | |
25 ( '-qual' => $string_quals, | |
26 '-id' => 'QualityFragment-12', | |
27 '-accession_number' => 'X78121', | |
28 ); | |
29 | |
30 # _or_ you can use an array of quality values | |
31 | |
32 my @q2 = split/ /,$string_quals; | |
33 $qualobj = Bio::Seq::PrimaryQual->new( '-qual' => \@q2, | |
34 '-primary_id' => 'chads primary_id', | |
35 '-desc' => 'chads desc', | |
36 '-accession_number' => 'chads accession_number', | |
37 '-id' => 'chads id' | |
38 ); | |
39 | |
40 # to get the quality values out: | |
41 | |
42 my @quals = @{$qualobj->qual()}; | |
43 | |
44 # to give _new_ quality values | |
45 | |
46 my $newqualstring = "50 90 1000 20 12 0 0"; | |
47 $qualobj->qual($newqualstring); | |
48 | |
49 | |
50 =head1 DESCRIPTION | |
51 | |
52 This module provides a mechanism for storing quality | |
53 values. Much more useful as part of | |
54 Bio::Seq::SeqWithQuality where these quality values | |
55 are associated with the sequence information. | |
56 | |
57 =head1 FEEDBACK | |
58 | |
59 =head2 Mailing Lists | |
60 | |
61 User feedback is an integral part of the evolution of this and other | |
62 Bioperl modules. Send your comments and suggestions preferably to one | |
63 of the Bioperl mailing lists. Your participation is much appreciated. | |
64 | |
65 bioperl-l@bioperl.org - General discussion | |
66 http://bio.perl.org/MailList.html - About the mailing lists | |
67 | |
68 =head2 Reporting Bugs | |
69 | |
70 Report bugs to the Bioperl bug tracking system to help us keep track | |
71 the bugs and their resolution. Bug reports can be submitted via email | |
72 or the web: | |
73 | |
74 bioperl-bugs@bio.perl.org | |
75 http://bugzilla.bioperl.org/ | |
76 | |
77 =head1 AUTHOR - Chad Matsalla | |
78 | |
79 Email bioinformatics@dieselwurks.com | |
80 | |
81 =head1 APPENDIX | |
82 | |
83 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
84 | |
85 =cut | |
86 | |
87 | |
88 # Let the code begin... | |
89 | |
90 package Bio::Seq::PrimaryQual; | |
91 use vars qw(@ISA %valid_type); | |
92 use strict; | |
93 | |
94 use Bio::Root::Root; | |
95 use Bio::Seq::QualI; | |
96 | |
97 @ISA = qw(Bio::Root::Root Bio::Seq::QualI); | |
98 | |
99 | |
100 =head2 new() | |
101 | |
102 Title : new() | |
103 Usage : $qual = Bio::Seq::PrimaryQual->new | |
104 ( -qual => '10 20 30 40 50 50 20 10', | |
105 -id => 'human_id', | |
106 -accession_number => 'AL000012', | |
107 ); | |
108 | |
109 Function: Returns a new Bio::Seq::PrimaryQual object from basic | |
110 constructors, being a string _or_ a reference to an array for the | |
111 sequence and strings for id and accession_number. Note that you | |
112 can provide an empty quality string. | |
113 Returns : a new Bio::Seq::PrimaryQual object | |
114 | |
115 =cut | |
116 | |
117 | |
118 | |
119 | |
120 sub new { | |
121 my ($class, @args) = @_; | |
122 my $self = $class->SUPER::new(@args); | |
123 | |
124 # default: turn ON the warnings (duh) | |
125 my($qual,$id,$acc,$pid,$desc,$given_id) = | |
126 $self->_rearrange([qw(QUAL | |
127 DISPLAY_ID | |
128 ACCESSION_NUMBER | |
129 PRIMARY_ID | |
130 DESC | |
131 ID | |
132 )], | |
133 @args); | |
134 if( defined $id && defined $given_id ) { | |
135 if( $id ne $given_id ) { | |
136 $self->throw("Provided both id and display_id constructor functions. [$id] [$given_id]"); | |
137 } | |
138 } | |
139 if( defined $given_id ) { $id = $given_id; } | |
140 | |
141 # note: the sequence string may be empty | |
142 $self->qual($qual ? $qual : []); | |
143 $id && $self->display_id($id); | |
144 $acc && $self->accession_number($acc); | |
145 $pid && $self->primary_id($pid); | |
146 $desc && $self->desc($desc); | |
147 | |
148 return $self; | |
149 } | |
150 | |
151 =head2 qual() | |
152 | |
153 Title : qual() | |
154 Usage : @quality_values = @{$obj->qual()}; | |
155 Function: Returns the quality as a reference to an array containing the | |
156 quality values. The individual elements of the quality array are | |
157 not validated and can be any numeric value. | |
158 Returns : A reference to an array. | |
159 | |
160 =cut | |
161 | |
162 sub qual { | |
163 my ($self,$value) = @_; | |
164 | |
165 if( ! defined $value || length($value) == 0 ) { | |
166 $self->{'qual'} ||= []; | |
167 } elsif( ref($value) =~ /ARRAY/i ) { | |
168 # if the user passed in a reference to an array | |
169 $self->{'qual'} = $value; | |
170 } elsif(! $self->validate_qual($value)){ | |
171 $self->throw("Attempting to set the quality to [$value] which does not look healthy"); | |
172 } else { | |
173 $self->{'qual'} = [split(/\s+/,$value)]; | |
174 } | |
175 | |
176 return $self->{'qual'}; | |
177 } | |
178 | |
179 =head2 validate_qual($qualstring) | |
180 | |
181 Title : validate_qual($qualstring) | |
182 Usage : print("Valid.") if { &validate_qual($self,$qualities); } | |
183 Function: Make sure that the quality, if it has length > 0, contains at | |
184 least one digit. Note that quality strings are parsed into arrays | |
185 using split/\d+/,$quality_string, so make sure that your quality | |
186 scalar looks like this if you want it to be parsed properly. | |
187 Returns : 1 for a valid sequence (WHY? Shouldn\'t it return 0? <boggle>) | |
188 Args : a scalar (any scalar, why PrimarySeq author?) and a scalar | |
189 containing the string to validate. | |
190 | |
191 =cut | |
192 | |
193 sub validate_qual { | |
194 # how do I validate quality values? | |
195 # \d+\s+\d+..., I suppose | |
196 my ($self,$qualstr) = @_; | |
197 # why the CORE?? -- (Because Bio::PrimarySeqI namespace has a | |
198 # length method, you have to qualify | |
199 # which length to use) | |
200 return 0 if (!defined $qualstr || CORE::length($qualstr) <= 0); | |
201 return 1 if( $qualstr =~ /\d/); | |
202 | |
203 return 0; | |
204 } | |
205 | |
206 =head2 subqual($start,$end) | |
207 | |
208 Title : subqual($start,$end) | |
209 Usage : @subset_of_quality_values = @{$obj->subqual(10,40)}; | |
210 Function: returns the quality values from $start to $end, where the | |
211 first value is 1 and the number is inclusive, ie 1-2 are the | |
212 first two bases of the sequence. Start cannot be larger than | |
213 end but can be equal. | |
214 Returns : A reference to an array. | |
215 Args : a start position and an end position | |
216 | |
217 =cut | |
218 | |
219 | |
220 sub subqual { | |
221 my ($self,$start,$end) = @_; | |
222 | |
223 if( $start > $end ){ | |
224 $self->throw("in subqual, start [$start] has to be greater than end [$end]"); | |
225 } | |
226 | |
227 if( $start <= 0 || $end > $self->length ) { | |
228 $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length.""); | |
229 } | |
230 | |
231 # remove one from start, and then length is end-start | |
232 | |
233 $start--; | |
234 $end--; | |
235 my @sub_qual_array = @{$self->{qual}}[$start..$end]; | |
236 | |
237 # return substr $self->seq(), $start, ($end-$start); | |
238 return \@sub_qual_array; | |
239 | |
240 } | |
241 | |
242 =head2 display_id() | |
243 | |
244 Title : display_id() | |
245 Usage : $id_string = $obj->display_id(); | |
246 Function: returns the display id, aka the common name of the Quality | |
247 object. | |
248 The semantics of this is that it is the most likely string to be | |
249 used as an identifier of the quality sequence, and likely to have | |
250 "human" readability. The id is equivalent to the ID field of the | |
251 GenBank/EMBL databanks and the id field of the Swissprot/sptrembl | |
252 database. In fasta format, the >(\S+) is presumed to be the id, | |
253 though some people overload the id to embed other information. | |
254 Bioperl does not use any embedded information in the ID field, | |
255 and people are encouraged to use other mechanisms (accession | |
256 field for example, or extending the sequence object) to solve | |
257 this. Notice that $seq->id() maps to this function, mainly for | |
258 legacy/convience issues | |
259 Returns : A string | |
260 Args : None | |
261 | |
262 =cut | |
263 | |
264 sub display_id { | |
265 my ($obj,$value) = @_; | |
266 if( defined $value) { | |
267 $obj->{'display_id'} = $value; | |
268 } | |
269 return $obj->{'display_id'}; | |
270 | |
271 } | |
272 | |
273 =head2 accession_number() | |
274 | |
275 Title : accession_number() | |
276 Usage : $unique_biological_key = $obj->accession_number(); | |
277 Function: Returns the unique biological id for a sequence, commonly | |
278 called the accession_number. For sequences from established | |
279 databases, the implementors should try to use the correct | |
280 accession number. Notice that primary_id() provides the unique id | |
281 for the implemetation, allowing multiple objects to have the same | |
282 accession number in a particular implementation. For sequences | |
283 with no accession number, this method should return "unknown". | |
284 Returns : A string | |
285 Args : None | |
286 | |
287 =cut | |
288 | |
289 sub accession_number { | |
290 my( $obj, $acc ) = @_; | |
291 | |
292 if (defined $acc) { | |
293 $obj->{'accession_number'} = $acc; | |
294 } else { | |
295 $acc = $obj->{'accession_number'}; | |
296 $acc = 'unknown' unless defined $acc; | |
297 } | |
298 return $acc; | |
299 } | |
300 | |
301 =head2 primary_id() | |
302 | |
303 Title : primary_id() | |
304 Usage : $unique_implementation_key = $obj->primary_id(); | |
305 Function: Returns the unique id for this object in this implementation. | |
306 This allows implementations to manage their own object ids in a | |
307 way the implementaiton can control clients can expect one id to | |
308 map to one object. For sequences with no accession number, this | |
309 method should return a stringified memory location. | |
310 Returns : A string | |
311 Args : None | |
312 | |
313 =cut | |
314 | |
315 sub primary_id { | |
316 my ($obj,$value) = @_; | |
317 if( defined $value) { | |
318 $obj->{'primary_id'} = $value; | |
319 } | |
320 return $obj->{'primary_id'}; | |
321 | |
322 } | |
323 | |
324 =head2 desc() | |
325 | |
326 Title : desc() | |
327 Usage : $qual->desc($newval); | |
328 $description = $qual->desc(); | |
329 Function: Get/set description text for a qual object | |
330 Example : | |
331 Returns : Value of desc | |
332 Args : newvalue (optional) | |
333 | |
334 =cut | |
335 | |
336 sub desc { | |
337 my ($obj,$value) = @_; | |
338 if( defined $value) { | |
339 $obj->{'desc'} = $value; | |
340 } | |
341 return $obj->{'desc'}; | |
342 | |
343 } | |
344 | |
345 =head2 id() | |
346 | |
347 Title : id() | |
348 Usage : $id = $qual->id(); | |
349 Function: Return the ID of the quality. This should normally be (and | |
350 actually is in the implementation provided here) just a synonym | |
351 for display_id(). | |
352 Returns : A string. | |
353 Args : None. | |
354 | |
355 =cut | |
356 | |
357 sub id { | |
358 my ($self,$value) = @_; | |
359 if( defined $value ) { | |
360 return $self->display_id($value); | |
361 } | |
362 return $self->display_id(); | |
363 } | |
364 | |
365 =head2 length() | |
366 | |
367 Title : length() | |
368 Usage : $length = $qual->length(); | |
369 Function: Return the length of the array holding the quality values. | |
370 Under most circumstances, this should match the number of quality | |
371 values but no validation is done when the PrimaryQual object is | |
372 constructed and non-digits could be put into this array. Is this | |
373 a bug? Just enough rope... | |
374 Returns : A scalar (the number of elements in the quality array). | |
375 Args : None. | |
376 | |
377 =cut | |
378 | |
379 sub length { | |
380 my $self = shift; | |
381 if (ref($self->{qual}) ne "ARRAY") { | |
382 $self->warn("{qual} is not an array here. Why? It appears to be ".ref($self->{qual})."(".$self->{qual}."). Good thing this can _never_ happen."); | |
383 } | |
384 return scalar(@{$self->{qual}}); | |
385 } | |
386 | |
387 =head2 qualat($position) | |
388 | |
389 Title : qualat($position) | |
390 Usage : $quality = $obj->qualat(10); | |
391 Function: Return the quality value at the given location, where the | |
392 first value is 1 and the number is inclusive, ie 1-2 are the first | |
393 two bases of the sequence. Start cannot be larger than end but can | |
394 be equal. | |
395 Returns : A scalar. | |
396 Args : A position. | |
397 | |
398 =cut | |
399 | |
400 sub qualat { | |
401 my ($self,$val) = @_; | |
402 my @qualat = @{$self->subqual($val,$val)}; | |
403 if (scalar(@qualat) == 1) { | |
404 return $qualat[0]; | |
405 } | |
406 else { | |
407 $self->throw("AAAH! qualat provided more then one quality."); | |
408 } | |
409 } | |
410 | |
411 =head2 to_string() | |
412 | |
413 Title : to_string() | |
414 Usage : $quality = $obj->to_string(); | |
415 Function: Return a textual representation of what the object contains. | |
416 For this module, this function will return: | |
417 qual | |
418 display_id | |
419 accession_number | |
420 primary_id | |
421 desc | |
422 id | |
423 length | |
424 Returns : A scalar. | |
425 Args : None. | |
426 | |
427 =cut | |
428 | |
429 sub to_string { | |
430 my ($self,$out,$result) = shift; | |
431 $out = "qual: ".join(',',@{$self->qual()}); | |
432 foreach (qw(display_id accession_number primary_id desc id)) { | |
433 $result = $self->$_(); | |
434 if (!$result) { $result = "<unset>"; } | |
435 $out .= "$_: $result\n"; | |
436 } | |
437 return $out; | |
438 } | |
439 | |
440 | |
441 sub to_string_automatic { | |
442 my ($self,$sub_result,$out) = shift; | |
443 foreach (sort keys %$self) { | |
444 print("Working on $_\n"); | |
445 eval { $self->$_(); }; | |
446 if ($@) { $sub_result = ref($_); } | |
447 elsif (!($sub_result = $self->$_())) { | |
448 $sub_result = "<unset>"; | |
449 } | |
450 if (ref($sub_result) eq "ARRAY") { | |
451 print("This thing ($_) is an array!\n"); | |
452 $sub_result = join(',',@$sub_result); | |
453 } | |
454 $out .= "$_: ".$sub_result."\n"; | |
455 } | |
456 return $out; | |
457 } | |
458 | |
459 1; |