0
|
1 # $Id: QualI.pm,v 1.4 2002/10/22 07:38:40 lapp Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::Seq::QualI
|
|
4 #
|
|
5 # Cared for by Chad Matsalla <bioinformatics@dieselwurks.com
|
|
6 #
|
|
7 # Copyright Chad Matsalla
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::Seq::QualI - Interface definition for a Bio::Seq::Qual
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 # get a Bio::Seq::Qual compliant object somehow
|
|
20
|
|
21 # to test this is a seq object
|
|
22
|
|
23 $obj->isa("Bio::Seq::QualI") || $obj->throw("$obj does not implement the Bio::Seq::QualI interface");
|
|
24
|
|
25 # accessors
|
|
26
|
|
27 $string = $obj->qual();
|
|
28 $substring = $obj->subqual(12,50);
|
|
29 $display = $obj->display_id(); # for human display
|
|
30 $id = $obj->primary_id(); # unique id for this object, implementation defined
|
|
31 $unique_key= $obj->accession_number();
|
|
32 # unique biological id
|
|
33
|
|
34
|
|
35
|
|
36 =head1 DESCRIPTION
|
|
37
|
|
38 This object defines an abstract interface to basic quality
|
|
39 information. PrimaryQual is an object just for the quality and its
|
|
40 name(s), nothing more. There is a pure perl implementation of this in
|
|
41 Bio::Seq::PrimaryQual. If you just want to use Bio::Seq::PrimaryQual
|
|
42 objects, then please read that module first. This module defines the
|
|
43 interface, and is of more interest to people who want to wrap their own
|
|
44 Perl Objects/RDBs/FileSystems etc in way that they "are" bioperl quality
|
|
45 objects, even though it is not using Perl to store the sequence etc.
|
|
46
|
|
47 This interface defines what bioperl consideres necessary to "be" a
|
|
48 sequence of qualities, without providing an implementation of this. (An
|
|
49 implementation is provided in Bio::Seq::PrimaryQual). If you want to
|
|
50 provide a Bio::Seq::PrimaryQual 'compliant' object which in fact wraps
|
|
51 another object/database/out-of-perl experience, then this is the correct
|
|
52 thing to wrap, generally by providing a wrapper class which would inheriet
|
|
53 from your object and this Bio::Seq::QualI interface. The wrapper
|
|
54 class then would have methods lists in the "Implementation Specific
|
|
55 Functions" which would provide these methods for your object.
|
|
56
|
|
57
|
|
58 =head1 FEEDBACK
|
|
59
|
|
60 =head2 Mailing Lists
|
|
61
|
|
62 User feedback is an integral part of the evolution of this and other
|
|
63 Bioperl modules. Send your comments and suggestions preferably to one
|
|
64 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
65
|
|
66 bioperl-l@bioperl.org - General discussion
|
|
67 http://bio.perl.org/MailList.html - About the mailing lists
|
|
68
|
|
69 =head2 Reporting Bugs
|
|
70
|
|
71 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
72 the bugs and their resolution. Bug reports can be submitted via email
|
|
73 or the web:
|
|
74
|
|
75 bioperl-bugs@bio.perl.org
|
|
76 http://bugzilla.bioperl.org/
|
|
77
|
|
78 =head1 AUTHOR - Chad Matsalla
|
|
79
|
|
80 This module is heavily based on Bio::Seq::PrimarySeq and is modeled after
|
|
81 or outright copies sections of it. Thanks Ewan!
|
|
82
|
|
83 Email bioinformatics@dieselwurks.com
|
|
84
|
|
85 =head1 APPENDIX
|
|
86
|
|
87 The rest of the documentation details each of the object methods.
|
|
88 Internal methods are usually preceded with a _
|
|
89
|
|
90 =cut
|
|
91
|
|
92
|
|
93 # Let the code begin...
|
|
94
|
|
95
|
|
96 package Bio::Seq::QualI;
|
|
97 use vars qw(@ISA);
|
|
98 use strict;
|
|
99 use Carp;
|
|
100
|
|
101 =head1 Implementation Specific Functions
|
|
102
|
|
103 These functions are the ones that a specific implementation must
|
|
104 define.
|
|
105
|
|
106 =head2 qual()
|
|
107
|
|
108 Title : qual()
|
|
109 Usage : @quality_values = @{$obj->qual()};
|
|
110 Function: Returns the quality as a reference to an array containing the
|
|
111 quality values. The individual elements of the quality array are
|
|
112 not validated and can be any numeric value.
|
|
113 Returns : A reference to an array.
|
|
114 Status :
|
|
115
|
|
116 =cut
|
|
117
|
|
118 sub qual {
|
|
119 my ($self) = @_;
|
|
120 if( $self->can('throw') ) {
|
|
121 $self->throw("Bio::Seq::QualI definition of qual - implementing class did not provide this method");
|
|
122 } else {
|
|
123 confess("Bio::Seq::QualI definition of qual - implementing class did not provide this method");
|
|
124 }
|
|
125 }
|
|
126
|
|
127 =head2 subqual($start,$end)
|
|
128
|
|
129 Title : subqual($start,$end)
|
|
130 Usage : @subset_of_quality_values = @{$obj->subseq(10,40)};
|
|
131 Function: returns the quality values from $start to $end, where the
|
|
132 first value is 1 and the number is inclusive, ie 1-2 are the first
|
|
133 two bases of the sequence. Start cannot be larger than end but can
|
|
134 be equal.
|
|
135 Returns : A reference to an array.
|
|
136 Args : a start position and an end position
|
|
137
|
|
138
|
|
139 =cut
|
|
140
|
|
141 sub subqual {
|
|
142 my ($self) = @_;
|
|
143
|
|
144 if( $self->can('throw') ) {
|
|
145 $self->throw("Bio::Seq::QualI definition of subqual - implementing class did not provide this method");
|
|
146 } else {
|
|
147 confess("Bio::Seq::QualI definition of subqual - implementing class did not provide this method");
|
|
148 }
|
|
149
|
|
150 }
|
|
151
|
|
152 =head2 display_id()
|
|
153
|
|
154 Title : display_id()
|
|
155 Usage : $id_string = $obj->display_id() _or_
|
|
156 $id_string = $obj->display_id($new_display_id);
|
|
157 Function: Returns the display id, aka the common name of the Quality
|
|
158 object.
|
|
159 The semantics of this is that it is the most likely string to be
|
|
160 used as an identifier of the quality sequence, and likely to have
|
|
161 "human" readability. The id is equivalent to the ID field of the
|
|
162 GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
|
|
163 database. In fasta format, the >(\S+) is presumed to be the id,
|
|
164 though some people overload the id to embed other information.
|
|
165 Bioperl does not use any embedded information in the ID field,
|
|
166 and people are encouraged to use other mechanisms (accession field
|
|
167 for example, or extending the sequence object) to solve this.
|
|
168 Notice that $seq->id() maps to this function, mainly for
|
|
169 legacy/convience issues
|
|
170 Returns : A string
|
|
171 Args : If an arg is provided, it will replace the existing display_id
|
|
172 in the object.
|
|
173
|
|
174
|
|
175 =cut
|
|
176
|
|
177 sub display_id {
|
|
178 my ($self) = @_;
|
|
179
|
|
180 if( $self->can('throw') ) {
|
|
181 $self->throw("Bio::Seq::QualI definition of id - implementing class did not provide this method");
|
|
182 } else {
|
|
183 confess("Bio::Seq::QualI definition of id - implementing class did not provide this method");
|
|
184 }
|
|
185
|
|
186 }
|
|
187
|
|
188
|
|
189 =head2 accession_number()
|
|
190
|
|
191 Title : accession_number()
|
|
192 Usage : $unique_biological_key = $obj->accession_number(); _or_
|
|
193 $unique_biological_key = $obj->accession_number($new_acc_num);
|
|
194 Function: Returns the unique biological id for a sequence, commonly
|
|
195 called the accession_number. For sequences from established
|
|
196 databases, the implementors should try to use the correct
|
|
197 accession number. Notice that primary_id() provides the unique id
|
|
198 for the implemetation, allowing multiple objects to have the same
|
|
199 accession number in a particular implementation. For sequences
|
|
200 with no accession number, this method should return "unknown".
|
|
201 Returns : A string.
|
|
202 Args : If an arg is provided, it will replace the existing
|
|
203 accession_number in the object.
|
|
204
|
|
205 =cut
|
|
206
|
|
207 sub accession_number {
|
|
208 my ($self,@args) = @_;
|
|
209
|
|
210 if( $self->can('throw') ) {
|
|
211 $self->throw("Bio::Seq::QualI definition of seq - implementing class did not provide this method");
|
|
212 } else {
|
|
213 confess("Bio::Seq::QualI definition of seq - implementing class did not provide this method");
|
|
214 }
|
|
215
|
|
216 }
|
|
217
|
|
218
|
|
219
|
|
220 =head2 primary_id()
|
|
221
|
|
222 Title : primary_id()
|
|
223 Usage : $unique_implementation_key = $obj->primary_id(); _or_
|
|
224 $unique_implementation_key = $obj->primary_id($new_prim_id);
|
|
225 Function: Returns the unique id for this object in this implementation.
|
|
226 This allows implementations to manage their own object ids in a
|
|
227 way the implementaiton can control clients can expect one id to
|
|
228 map to one object. For sequences with no accession number, this
|
|
229 method should return a stringified memory location.
|
|
230 Returns : A string
|
|
231 Args : If an arg is provided, it will replace the existing
|
|
232 primary_id in the object.
|
|
233
|
|
234 =cut
|
|
235
|
|
236 sub primary_id {
|
|
237 my ($self,@args) = @_;
|
|
238
|
|
239 if( $self->can('throw') ) {
|
|
240 $self->throw("Bio::Seq::QualI definition of qual - implementing class did not provide this method");
|
|
241 } else {
|
|
242 confess("Bio::Seq::QualI definition of qual - implementing class did not provide this method");
|
|
243 }
|
|
244
|
|
245 }
|
|
246
|
|
247
|
|
248 =head2 can_call_new()
|
|
249
|
|
250 Title : can_call_new()
|
|
251 Usage : if( $obj->can_call_new ) {
|
|
252 $newobj = $obj->new( %param );
|
|
253 }
|
|
254 Function: can_call_new returns 1 or 0 depending on whether an
|
|
255 implementation allows new constructor to be called. If a new
|
|
256 constructor is allowed, then it should take the followed hashed
|
|
257 constructor list.
|
|
258 $myobject->new( -qual => $quality_as_string,
|
|
259 -display_id => $id,
|
|
260 -accession_number => $accession,
|
|
261 );
|
|
262 Example :
|
|
263 Returns : 1 or 0
|
|
264 Args :
|
|
265
|
|
266
|
|
267 =cut
|
|
268
|
|
269 sub can_call_new{
|
|
270 my ($self,@args) = @_;
|
|
271 # we default to 0 here
|
|
272 return 0;
|
|
273 }
|
|
274
|
|
275 =head2 qualat($position)
|
|
276
|
|
277 Title : qualat($position)
|
|
278 Usage : $quality = $obj->qualat(10);
|
|
279 Function: Return the quality value at the given location, where the
|
|
280 first value is 1 and the number is inclusive, ie 1-2 are the first
|
|
281 two bases of the sequence. Start cannot be larger than end but can
|
|
282 be equal.
|
|
283 Returns : A scalar.
|
|
284 Args : A position.
|
|
285
|
|
286 =cut
|
|
287
|
|
288 sub qualat {
|
|
289 my ($self,$value) = @_;
|
|
290 if( $self->can('warn') ) {
|
|
291 $self->warn("Bio::Seq::QualI definition of qualat - implementing class did not provide this method");
|
|
292 } else {
|
|
293 warn("Bio::Seq::QualI definition of qualat - implementing class did not provide this method");
|
|
294 }
|
|
295 return '';
|
|
296 }
|
|
297
|
|
298 =head1 Optional Implementation Functions
|
|
299
|
|
300 The following functions rely on the above functions. A implementing
|
|
301 class does not need to provide these functions, as they will be
|
|
302 provided by this class, but is free to override these functions.
|
|
303
|
|
304 All of revcom(), trunc(), and translate() create new sequence
|
|
305 objects. They will call new() on the class of the sequence object
|
|
306 instance passed as argument, unless can_call_new() returns FALSE. In
|
|
307 the latter case a Bio::PrimarySeq object will be created. Implementors
|
|
308 which really want to control how objects are created (eg, for object
|
|
309 persistence over a database, or objects in a CORBA framework), they
|
|
310 are encouraged to override these methods
|
|
311
|
|
312 =head2 revcom
|
|
313
|
|
314 Title : revcom
|
|
315 Usage : @rev = @{$qual->revcom()};
|
|
316 Function: Produces a new Bio::Seq::QualI implementing object which
|
|
317 is reversed from the original quality array.
|
|
318 The id is the same id as the orginal sequence, and the accession number
|
|
319 is also indentical. If someone wants to track that this sequence has
|
|
320 been reversed, it needs to define its own extensions
|
|
321
|
|
322 To do an inplace edit of an object you can go:
|
|
323
|
|
324 $qual = $qual->revcom();
|
|
325
|
|
326 This of course, causes Perl to handle the garbage collection of the old
|
|
327 object, but it is roughly speaking as efficient as an inplace edit.
|
|
328 Returns : A new (fresh) Bio::Seq::PrimaryQualI object
|
|
329 Args : none
|
|
330
|
|
331 =cut
|
|
332
|
|
333 sub revcom{
|
|
334 my ($self) = @_;
|
|
335 # this is the cleanest way
|
|
336 my @qualities = @{$self->seq()};
|
|
337 my @reversed_qualities = reverse(@qualities);
|
|
338 my $seqclass;
|
|
339 if($self->can_call_new()) {
|
|
340 $seqclass = ref($self);
|
|
341 } else {
|
|
342 $seqclass = 'Bio::Seq::PrimaryQual';
|
|
343 # Wassat?
|
|
344 # $self->_attempt_to_load_Seq();
|
|
345 }
|
|
346 # the \@reverse_qualities thing works simply because I will it to work.
|
|
347 my $out = $seqclass->new( '-qual' => \@reversed_qualities,
|
|
348 '-display_id' => $self->display_id,
|
|
349 '-accession_number' => $self->accession_number,
|
|
350 '-desc' => $self->desc()
|
|
351 );
|
|
352 return $out;
|
|
353 }
|
|
354
|
|
355 =head2 trunc()
|
|
356
|
|
357 Title : trunc
|
|
358 Usage : $subseq = $myseq->trunc(10,100);
|
|
359 Function: Provides a truncation of a sequence,
|
|
360 Returns : a fresh Bio::Seq::QualI implementing object
|
|
361 Args : Two integers denoting first and last base of the sub-sequence.
|
|
362
|
|
363
|
|
364 =cut
|
|
365
|
|
366 sub trunc {
|
|
367 my ($self,$start,$end) = @_;
|
|
368
|
|
369 if( !$end ) {
|
|
370 if( $self->can('throw') ) {
|
|
371 $self->throw("trunc start,end");
|
|
372 } else {
|
|
373 confess("[$self] trunc start,end");
|
|
374 }
|
|
375 }
|
|
376
|
|
377 if( $end < $start ) {
|
|
378 if( $self->can('throw') ) {
|
|
379 $self->throw("$end is smaller than $start. if you want to truncated and reverse complement, you must call trunc followed by revcom. Sorry.");
|
|
380 } else {
|
|
381 confess("[$self] $end is smaller than $start. If you want to truncated and reverse complement, you must call trunc followed by revcom. Sorry.");
|
|
382 }
|
|
383 }
|
|
384
|
|
385 my $r_qual = $self->subqual($start,$end);
|
|
386
|
|
387 my $seqclass;
|
|
388 if($self->can_call_new()) {
|
|
389 $seqclass = ref($self);
|
|
390 } else {
|
|
391 $seqclass = 'Bio::Seq::PrimaryQual';
|
|
392 # wassat?
|
|
393 # $self->_attempt_to_load_Seq();
|
|
394 }
|
|
395 my $out = $seqclass->new( '-qual' => $r_qual,
|
|
396 '-display_id' => $self->display_id,
|
|
397 '-accession_number' => $self->accession_number,
|
|
398 '-desc' => $self->desc()
|
|
399 );
|
|
400 return $out;
|
|
401 }
|
|
402
|
|
403
|
|
404 =head2 translate()
|
|
405
|
|
406 Title : translate()
|
|
407 Usage : $protein_seq_obj = $dna_seq_obj->translate
|
|
408 #if full CDS expected:
|
|
409 $protein_seq_obj = $cds_seq_obj->translate(undef,undef,undef,undef,1);
|
|
410 Function: Completely useless in this interface.
|
|
411 Returns : Nothing.
|
|
412 Args : Nothing.
|
|
413
|
|
414 =cut
|
|
415
|
|
416
|
|
417 sub translate {
|
|
418 return 0;
|
|
419 }
|
|
420
|
|
421
|
|
422 =head2 id()
|
|
423
|
|
424 Title : id()
|
|
425 Usage : $id = $qual->id()
|
|
426 Function: ID of the quality. This should normally be (and actually is in
|
|
427 the implementation provided here) just a synonym for display_id().
|
|
428 Example :
|
|
429 Returns : A string.
|
|
430 Args :
|
|
431
|
|
432
|
|
433 =cut
|
|
434
|
|
435 sub id {
|
|
436 my ($self)= @_;
|
|
437 return $self->display_id();
|
|
438 }
|
|
439
|
|
440 =head2 length()
|
|
441
|
|
442 Title : length()
|
|
443 Usage : $length = $qual->length();
|
|
444 Function: Return the length of the array holding the quality values.
|
|
445 Under most circumstances, this should match the number of quality
|
|
446 values but no validation is done when the PrimaryQual object is
|
|
447 constructed and non-digits could be put into this array. Is this a
|
|
448 bug? Just enough rope...
|
|
449 Returns : A scalar (the number of elements in the quality array).
|
|
450 Args : None.
|
|
451
|
|
452 =cut
|
|
453
|
|
454 sub length {
|
|
455 my ($self)= @_;
|
|
456 if( $self->can('throw') ) {
|
|
457 $self->throw("Bio::Seq::QualI definition of length - implementing class did not provide this method");
|
|
458 } else {
|
|
459 confess("Bio::Seq::QualI definition of length - implementing class did not provide this method");
|
|
460 }
|
|
461 }
|
|
462
|
|
463
|
|
464 =head2 desc()
|
|
465
|
|
466 Title : desc()
|
|
467 Usage : $qual->desc($newval);
|
|
468 $description = $seq->desc();
|
|
469 Function: Get/set description text for a qual object
|
|
470 Example :
|
|
471 Returns : value of desc
|
|
472 Args : newvalue (optional)
|
|
473
|
|
474 =cut
|
|
475
|
|
476 sub desc {
|
|
477 my ($self,$value) = @_;
|
|
478 if( $self->can('warn') ) {
|
|
479 $self->warn("Bio::Seq::QualI definition of desc - implementing class did not provide this method");
|
|
480 } else {
|
|
481 warn("Bio::Seq::QualI definition of desc - implementing class did not provide this method");
|
|
482 }
|
|
483 return '';
|
|
484 }
|
|
485
|
|
486 # These methods are here for backward compatibility with the old, 0.5
|
|
487 # Seq objects. They all throw warnings that someone is using a
|
|
488 # deprecated method, and may eventually be removed completely from
|
|
489 # this object. However, they are important to ease the transition from
|
|
490 # the old system.
|
|
491
|
|
492 =head1 Private functions
|
|
493
|
|
494 These are some private functions for the PrimarySeqI interface. You do not
|
|
495 need to implement these functions
|
|
496
|
|
497 =head2 _attempt_to_load_Seq
|
|
498
|
|
499 Title : _attempt_to_load_Seq
|
|
500 Usage :
|
|
501 Function:
|
|
502 Example :
|
|
503 Returns :
|
|
504 Args :
|
|
505
|
|
506
|
|
507 =cut
|
|
508
|
|
509 sub _attempt_to_load_Seq{
|
|
510 my ($self) = @_;
|
|
511
|
|
512 if( $main::{'Bio::Seq::PrimaryQual'} ) {
|
|
513 return 1;
|
|
514 } else {
|
|
515 eval {
|
|
516 require Bio::Seq::PrimaryQual;
|
|
517 };
|
|
518 if( $@ ) {
|
|
519 if( $self->can('throw') ) {
|
|
520 $self->throw("Bio::Seq::PrimaryQual could not be loaded for $self\nThis indicates that you are using Bio::Seq::PrimaryQualI without Bio::Seq::PrimaryQual loaded and without providing a complete solution\nThe most likely problem is that there has been a misconfiguration of the bioperl environment\nActual exception\n\n$@\n");
|
|
521 } else {
|
|
522 confess("Bio::Seq::PrimarySeq could not be loaded for $self\nThis indicates that you are usnig Bio::Seq::PrimaryQualI without Bio::Seq::PrimaryQual loaded and without providing a complete solution\nThe most likely problem is that there has been a misconfiguration of the bioperl environment\nActual exception\n\n$@\n");
|
|
523 }
|
|
524 return 0;
|
|
525 }
|
|
526 return 1;
|
|
527 }
|
|
528
|
|
529 }
|
|
530
|
|
531
|
|
532 =head2 qualtype()
|
|
533
|
|
534 Title : qualtype()
|
|
535 Usage : if( $obj->qualtype eq 'phd' ) { /Do Something/ }
|
|
536 Function: At this time, this function is not used for
|
|
537 Bio::Seq::PrimaryQual objects. In fact, now it is a month later and
|
|
538 I just completed the Bio::Seq::SeqWithQuality object and this is
|
|
539 definitely deprecated.
|
|
540 Returns : Nothing. (not implemented)
|
|
541 Args : none
|
|
542 Status : Virtual
|
|
543
|
|
544
|
|
545 =cut
|
|
546
|
|
547 sub qualtype {
|
|
548 my ($self,@args) = @_;
|
|
549 if( $self->can('throw') ) {
|
|
550 # $self->throw("Bio::Seq::QualI definition of qual - implementing class did not provide this method");
|
|
551 $self->throw("qualtypetype is not used with quality objects.");
|
|
552 } else {
|
|
553 # confess("Bio::Seq::QualI definition of qual - implementing class did not provide this method");
|
|
554 confess("qualtype is not used with quality objects.");
|
|
555 }
|
|
556
|
|
557
|
|
558 }
|
|
559
|
|
560
|
|
561
|
|
562
|
|
563 1;
|