0
|
1 # $Id: PrimaryQual.pm,v 1.17 2002/10/22 07:38:40 lapp Exp $
|
|
2 #
|
|
3 # bioperl module for Bio::PrimaryQual
|
|
4 #
|
|
5 # Cared for by Chad Matsalla <bioinformatics@dieselwurks.com>
|
|
6 #
|
|
7 # Copyright Chad Matsalla
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Seq::PrimaryQual - Bioperl lightweight Quality Object
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 use Bio::Seq::PrimaryQual;
|
|
20
|
|
21 # you can use either a space-delimited string for quality
|
|
22
|
|
23 my $string_quals = "10 20 30 40 50 40 30 20 10";
|
|
24 my $qualobj = Bio::Seq::PrimaryQual->new
|
|
25 ( '-qual' => $string_quals,
|
|
26 '-id' => 'QualityFragment-12',
|
|
27 '-accession_number' => 'X78121',
|
|
28 );
|
|
29
|
|
30 # _or_ you can use an array of quality values
|
|
31
|
|
32 my @q2 = split/ /,$string_quals;
|
|
33 $qualobj = Bio::Seq::PrimaryQual->new( '-qual' => \@q2,
|
|
34 '-primary_id' => 'chads primary_id',
|
|
35 '-desc' => 'chads desc',
|
|
36 '-accession_number' => 'chads accession_number',
|
|
37 '-id' => 'chads id'
|
|
38 );
|
|
39
|
|
40 # to get the quality values out:
|
|
41
|
|
42 my @quals = @{$qualobj->qual()};
|
|
43
|
|
44 # to give _new_ quality values
|
|
45
|
|
46 my $newqualstring = "50 90 1000 20 12 0 0";
|
|
47 $qualobj->qual($newqualstring);
|
|
48
|
|
49
|
|
50 =head1 DESCRIPTION
|
|
51
|
|
52 This module provides a mechanism for storing quality
|
|
53 values. Much more useful as part of
|
|
54 Bio::Seq::SeqWithQuality where these quality values
|
|
55 are associated with the sequence information.
|
|
56
|
|
57 =head1 FEEDBACK
|
|
58
|
|
59 =head2 Mailing Lists
|
|
60
|
|
61 User feedback is an integral part of the evolution of this and other
|
|
62 Bioperl modules. Send your comments and suggestions preferably to one
|
|
63 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
64
|
|
65 bioperl-l@bioperl.org - General discussion
|
|
66 http://bio.perl.org/MailList.html - About the mailing lists
|
|
67
|
|
68 =head2 Reporting Bugs
|
|
69
|
|
70 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
71 the bugs and their resolution. Bug reports can be submitted via email
|
|
72 or the web:
|
|
73
|
|
74 bioperl-bugs@bio.perl.org
|
|
75 http://bugzilla.bioperl.org/
|
|
76
|
|
77 =head1 AUTHOR - Chad Matsalla
|
|
78
|
|
79 Email bioinformatics@dieselwurks.com
|
|
80
|
|
81 =head1 APPENDIX
|
|
82
|
|
83 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
|
|
84
|
|
85 =cut
|
|
86
|
|
87
|
|
88 # Let the code begin...
|
|
89
|
|
90 package Bio::Seq::PrimaryQual;
|
|
91 use vars qw(@ISA %valid_type);
|
|
92 use strict;
|
|
93
|
|
94 use Bio::Root::Root;
|
|
95 use Bio::Seq::QualI;
|
|
96
|
|
97 @ISA = qw(Bio::Root::Root Bio::Seq::QualI);
|
|
98
|
|
99
|
|
100 =head2 new()
|
|
101
|
|
102 Title : new()
|
|
103 Usage : $qual = Bio::Seq::PrimaryQual->new
|
|
104 ( -qual => '10 20 30 40 50 50 20 10',
|
|
105 -id => 'human_id',
|
|
106 -accession_number => 'AL000012',
|
|
107 );
|
|
108
|
|
109 Function: Returns a new Bio::Seq::PrimaryQual object from basic
|
|
110 constructors, being a string _or_ a reference to an array for the
|
|
111 sequence and strings for id and accession_number. Note that you
|
|
112 can provide an empty quality string.
|
|
113 Returns : a new Bio::Seq::PrimaryQual object
|
|
114
|
|
115 =cut
|
|
116
|
|
117
|
|
118
|
|
119
|
|
120 sub new {
|
|
121 my ($class, @args) = @_;
|
|
122 my $self = $class->SUPER::new(@args);
|
|
123
|
|
124 # default: turn ON the warnings (duh)
|
|
125 my($qual,$id,$acc,$pid,$desc,$given_id) =
|
|
126 $self->_rearrange([qw(QUAL
|
|
127 DISPLAY_ID
|
|
128 ACCESSION_NUMBER
|
|
129 PRIMARY_ID
|
|
130 DESC
|
|
131 ID
|
|
132 )],
|
|
133 @args);
|
|
134 if( defined $id && defined $given_id ) {
|
|
135 if( $id ne $given_id ) {
|
|
136 $self->throw("Provided both id and display_id constructor functions. [$id] [$given_id]");
|
|
137 }
|
|
138 }
|
|
139 if( defined $given_id ) { $id = $given_id; }
|
|
140
|
|
141 # note: the sequence string may be empty
|
|
142 $self->qual($qual ? $qual : []);
|
|
143 $id && $self->display_id($id);
|
|
144 $acc && $self->accession_number($acc);
|
|
145 $pid && $self->primary_id($pid);
|
|
146 $desc && $self->desc($desc);
|
|
147
|
|
148 return $self;
|
|
149 }
|
|
150
|
|
151 =head2 qual()
|
|
152
|
|
153 Title : qual()
|
|
154 Usage : @quality_values = @{$obj->qual()};
|
|
155 Function: Returns the quality as a reference to an array containing the
|
|
156 quality values. The individual elements of the quality array are
|
|
157 not validated and can be any numeric value.
|
|
158 Returns : A reference to an array.
|
|
159
|
|
160 =cut
|
|
161
|
|
162 sub qual {
|
|
163 my ($self,$value) = @_;
|
|
164
|
|
165 if( ! defined $value || length($value) == 0 ) {
|
|
166 $self->{'qual'} ||= [];
|
|
167 } elsif( ref($value) =~ /ARRAY/i ) {
|
|
168 # if the user passed in a reference to an array
|
|
169 $self->{'qual'} = $value;
|
|
170 } elsif(! $self->validate_qual($value)){
|
|
171 $self->throw("Attempting to set the quality to [$value] which does not look healthy");
|
|
172 } else {
|
|
173 $self->{'qual'} = [split(/\s+/,$value)];
|
|
174 }
|
|
175
|
|
176 return $self->{'qual'};
|
|
177 }
|
|
178
|
|
179 =head2 validate_qual($qualstring)
|
|
180
|
|
181 Title : validate_qual($qualstring)
|
|
182 Usage : print("Valid.") if { &validate_qual($self,$qualities); }
|
|
183 Function: Make sure that the quality, if it has length > 0, contains at
|
|
184 least one digit. Note that quality strings are parsed into arrays
|
|
185 using split/\d+/,$quality_string, so make sure that your quality
|
|
186 scalar looks like this if you want it to be parsed properly.
|
|
187 Returns : 1 for a valid sequence (WHY? Shouldn\'t it return 0? <boggle>)
|
|
188 Args : a scalar (any scalar, why PrimarySeq author?) and a scalar
|
|
189 containing the string to validate.
|
|
190
|
|
191 =cut
|
|
192
|
|
193 sub validate_qual {
|
|
194 # how do I validate quality values?
|
|
195 # \d+\s+\d+..., I suppose
|
|
196 my ($self,$qualstr) = @_;
|
|
197 # why the CORE?? -- (Because Bio::PrimarySeqI namespace has a
|
|
198 # length method, you have to qualify
|
|
199 # which length to use)
|
|
200 return 0 if (!defined $qualstr || CORE::length($qualstr) <= 0);
|
|
201 return 1 if( $qualstr =~ /\d/);
|
|
202
|
|
203 return 0;
|
|
204 }
|
|
205
|
|
206 =head2 subqual($start,$end)
|
|
207
|
|
208 Title : subqual($start,$end)
|
|
209 Usage : @subset_of_quality_values = @{$obj->subqual(10,40)};
|
|
210 Function: returns the quality values from $start to $end, where the
|
|
211 first value is 1 and the number is inclusive, ie 1-2 are the
|
|
212 first two bases of the sequence. Start cannot be larger than
|
|
213 end but can be equal.
|
|
214 Returns : A reference to an array.
|
|
215 Args : a start position and an end position
|
|
216
|
|
217 =cut
|
|
218
|
|
219
|
|
220 sub subqual {
|
|
221 my ($self,$start,$end) = @_;
|
|
222
|
|
223 if( $start > $end ){
|
|
224 $self->throw("in subqual, start [$start] has to be greater than end [$end]");
|
|
225 }
|
|
226
|
|
227 if( $start <= 0 || $end > $self->length ) {
|
|
228 $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
|
|
229 }
|
|
230
|
|
231 # remove one from start, and then length is end-start
|
|
232
|
|
233 $start--;
|
|
234 $end--;
|
|
235 my @sub_qual_array = @{$self->{qual}}[$start..$end];
|
|
236
|
|
237 # return substr $self->seq(), $start, ($end-$start);
|
|
238 return \@sub_qual_array;
|
|
239
|
|
240 }
|
|
241
|
|
242 =head2 display_id()
|
|
243
|
|
244 Title : display_id()
|
|
245 Usage : $id_string = $obj->display_id();
|
|
246 Function: returns the display id, aka the common name of the Quality
|
|
247 object.
|
|
248 The semantics of this is that it is the most likely string to be
|
|
249 used as an identifier of the quality sequence, and likely to have
|
|
250 "human" readability. The id is equivalent to the ID field of the
|
|
251 GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
|
|
252 database. In fasta format, the >(\S+) is presumed to be the id,
|
|
253 though some people overload the id to embed other information.
|
|
254 Bioperl does not use any embedded information in the ID field,
|
|
255 and people are encouraged to use other mechanisms (accession
|
|
256 field for example, or extending the sequence object) to solve
|
|
257 this. Notice that $seq->id() maps to this function, mainly for
|
|
258 legacy/convience issues
|
|
259 Returns : A string
|
|
260 Args : None
|
|
261
|
|
262 =cut
|
|
263
|
|
264 sub display_id {
|
|
265 my ($obj,$value) = @_;
|
|
266 if( defined $value) {
|
|
267 $obj->{'display_id'} = $value;
|
|
268 }
|
|
269 return $obj->{'display_id'};
|
|
270
|
|
271 }
|
|
272
|
|
273 =head2 accession_number()
|
|
274
|
|
275 Title : accession_number()
|
|
276 Usage : $unique_biological_key = $obj->accession_number();
|
|
277 Function: Returns the unique biological id for a sequence, commonly
|
|
278 called the accession_number. For sequences from established
|
|
279 databases, the implementors should try to use the correct
|
|
280 accession number. Notice that primary_id() provides the unique id
|
|
281 for the implemetation, allowing multiple objects to have the same
|
|
282 accession number in a particular implementation. For sequences
|
|
283 with no accession number, this method should return "unknown".
|
|
284 Returns : A string
|
|
285 Args : None
|
|
286
|
|
287 =cut
|
|
288
|
|
289 sub accession_number {
|
|
290 my( $obj, $acc ) = @_;
|
|
291
|
|
292 if (defined $acc) {
|
|
293 $obj->{'accession_number'} = $acc;
|
|
294 } else {
|
|
295 $acc = $obj->{'accession_number'};
|
|
296 $acc = 'unknown' unless defined $acc;
|
|
297 }
|
|
298 return $acc;
|
|
299 }
|
|
300
|
|
301 =head2 primary_id()
|
|
302
|
|
303 Title : primary_id()
|
|
304 Usage : $unique_implementation_key = $obj->primary_id();
|
|
305 Function: Returns the unique id for this object in this implementation.
|
|
306 This allows implementations to manage their own object ids in a
|
|
307 way the implementaiton can control clients can expect one id to
|
|
308 map to one object. For sequences with no accession number, this
|
|
309 method should return a stringified memory location.
|
|
310 Returns : A string
|
|
311 Args : None
|
|
312
|
|
313 =cut
|
|
314
|
|
315 sub primary_id {
|
|
316 my ($obj,$value) = @_;
|
|
317 if( defined $value) {
|
|
318 $obj->{'primary_id'} = $value;
|
|
319 }
|
|
320 return $obj->{'primary_id'};
|
|
321
|
|
322 }
|
|
323
|
|
324 =head2 desc()
|
|
325
|
|
326 Title : desc()
|
|
327 Usage : $qual->desc($newval);
|
|
328 $description = $qual->desc();
|
|
329 Function: Get/set description text for a qual object
|
|
330 Example :
|
|
331 Returns : Value of desc
|
|
332 Args : newvalue (optional)
|
|
333
|
|
334 =cut
|
|
335
|
|
336 sub desc {
|
|
337 my ($obj,$value) = @_;
|
|
338 if( defined $value) {
|
|
339 $obj->{'desc'} = $value;
|
|
340 }
|
|
341 return $obj->{'desc'};
|
|
342
|
|
343 }
|
|
344
|
|
345 =head2 id()
|
|
346
|
|
347 Title : id()
|
|
348 Usage : $id = $qual->id();
|
|
349 Function: Return the ID of the quality. This should normally be (and
|
|
350 actually is in the implementation provided here) just a synonym
|
|
351 for display_id().
|
|
352 Returns : A string.
|
|
353 Args : None.
|
|
354
|
|
355 =cut
|
|
356
|
|
357 sub id {
|
|
358 my ($self,$value) = @_;
|
|
359 if( defined $value ) {
|
|
360 return $self->display_id($value);
|
|
361 }
|
|
362 return $self->display_id();
|
|
363 }
|
|
364
|
|
365 =head2 length()
|
|
366
|
|
367 Title : length()
|
|
368 Usage : $length = $qual->length();
|
|
369 Function: Return the length of the array holding the quality values.
|
|
370 Under most circumstances, this should match the number of quality
|
|
371 values but no validation is done when the PrimaryQual object is
|
|
372 constructed and non-digits could be put into this array. Is this
|
|
373 a bug? Just enough rope...
|
|
374 Returns : A scalar (the number of elements in the quality array).
|
|
375 Args : None.
|
|
376
|
|
377 =cut
|
|
378
|
|
379 sub length {
|
|
380 my $self = shift;
|
|
381 if (ref($self->{qual}) ne "ARRAY") {
|
|
382 $self->warn("{qual} is not an array here. Why? It appears to be ".ref($self->{qual})."(".$self->{qual}."). Good thing this can _never_ happen.");
|
|
383 }
|
|
384 return scalar(@{$self->{qual}});
|
|
385 }
|
|
386
|
|
387 =head2 qualat($position)
|
|
388
|
|
389 Title : qualat($position)
|
|
390 Usage : $quality = $obj->qualat(10);
|
|
391 Function: Return the quality value at the given location, where the
|
|
392 first value is 1 and the number is inclusive, ie 1-2 are the first
|
|
393 two bases of the sequence. Start cannot be larger than end but can
|
|
394 be equal.
|
|
395 Returns : A scalar.
|
|
396 Args : A position.
|
|
397
|
|
398 =cut
|
|
399
|
|
400 sub qualat {
|
|
401 my ($self,$val) = @_;
|
|
402 my @qualat = @{$self->subqual($val,$val)};
|
|
403 if (scalar(@qualat) == 1) {
|
|
404 return $qualat[0];
|
|
405 }
|
|
406 else {
|
|
407 $self->throw("AAAH! qualat provided more then one quality.");
|
|
408 }
|
|
409 }
|
|
410
|
|
411 =head2 to_string()
|
|
412
|
|
413 Title : to_string()
|
|
414 Usage : $quality = $obj->to_string();
|
|
415 Function: Return a textual representation of what the object contains.
|
|
416 For this module, this function will return:
|
|
417 qual
|
|
418 display_id
|
|
419 accession_number
|
|
420 primary_id
|
|
421 desc
|
|
422 id
|
|
423 length
|
|
424 Returns : A scalar.
|
|
425 Args : None.
|
|
426
|
|
427 =cut
|
|
428
|
|
429 sub to_string {
|
|
430 my ($self,$out,$result) = shift;
|
|
431 $out = "qual: ".join(',',@{$self->qual()});
|
|
432 foreach (qw(display_id accession_number primary_id desc id)) {
|
|
433 $result = $self->$_();
|
|
434 if (!$result) { $result = "<unset>"; }
|
|
435 $out .= "$_: $result\n";
|
|
436 }
|
|
437 return $out;
|
|
438 }
|
|
439
|
|
440
|
|
441 sub to_string_automatic {
|
|
442 my ($self,$sub_result,$out) = shift;
|
|
443 foreach (sort keys %$self) {
|
|
444 print("Working on $_\n");
|
|
445 eval { $self->$_(); };
|
|
446 if ($@) { $sub_result = ref($_); }
|
|
447 elsif (!($sub_result = $self->$_())) {
|
|
448 $sub_result = "<unset>";
|
|
449 }
|
|
450 if (ref($sub_result) eq "ARRAY") {
|
|
451 print("This thing ($_) is an array!\n");
|
|
452 $sub_result = join(',',@$sub_result);
|
|
453 }
|
|
454 $out .= "$_: ".$sub_result."\n";
|
|
455 }
|
|
456 return $out;
|
|
457 }
|
|
458
|
|
459 1;
|