comparison variant_effect_predictor/Bio/Seq/PrimaryQual.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: PrimaryQual.pm,v 1.17 2002/10/22 07:38:40 lapp Exp $
2 #
3 # bioperl module for Bio::PrimaryQual
4 #
5 # Cared for by Chad Matsalla <bioinformatics@dieselwurks.com>
6 #
7 # Copyright Chad Matsalla
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Seq::PrimaryQual - Bioperl lightweight Quality Object
16
17 =head1 SYNOPSIS
18
19 use Bio::Seq::PrimaryQual;
20
21 # you can use either a space-delimited string for quality
22
23 my $string_quals = "10 20 30 40 50 40 30 20 10";
24 my $qualobj = Bio::Seq::PrimaryQual->new
25 ( '-qual' => $string_quals,
26 '-id' => 'QualityFragment-12',
27 '-accession_number' => 'X78121',
28 );
29
30 # _or_ you can use an array of quality values
31
32 my @q2 = split/ /,$string_quals;
33 $qualobj = Bio::Seq::PrimaryQual->new( '-qual' => \@q2,
34 '-primary_id' => 'chads primary_id',
35 '-desc' => 'chads desc',
36 '-accession_number' => 'chads accession_number',
37 '-id' => 'chads id'
38 );
39
40 # to get the quality values out:
41
42 my @quals = @{$qualobj->qual()};
43
44 # to give _new_ quality values
45
46 my $newqualstring = "50 90 1000 20 12 0 0";
47 $qualobj->qual($newqualstring);
48
49
50 =head1 DESCRIPTION
51
52 This module provides a mechanism for storing quality
53 values. Much more useful as part of
54 Bio::Seq::SeqWithQuality where these quality values
55 are associated with the sequence information.
56
57 =head1 FEEDBACK
58
59 =head2 Mailing Lists
60
61 User feedback is an integral part of the evolution of this and other
62 Bioperl modules. Send your comments and suggestions preferably to one
63 of the Bioperl mailing lists. Your participation is much appreciated.
64
65 bioperl-l@bioperl.org - General discussion
66 http://bio.perl.org/MailList.html - About the mailing lists
67
68 =head2 Reporting Bugs
69
70 Report bugs to the Bioperl bug tracking system to help us keep track
71 the bugs and their resolution. Bug reports can be submitted via email
72 or the web:
73
74 bioperl-bugs@bio.perl.org
75 http://bugzilla.bioperl.org/
76
77 =head1 AUTHOR - Chad Matsalla
78
79 Email bioinformatics@dieselwurks.com
80
81 =head1 APPENDIX
82
83 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
84
85 =cut
86
87
88 # Let the code begin...
89
90 package Bio::Seq::PrimaryQual;
91 use vars qw(@ISA %valid_type);
92 use strict;
93
94 use Bio::Root::Root;
95 use Bio::Seq::QualI;
96
97 @ISA = qw(Bio::Root::Root Bio::Seq::QualI);
98
99
100 =head2 new()
101
102 Title : new()
103 Usage : $qual = Bio::Seq::PrimaryQual->new
104 ( -qual => '10 20 30 40 50 50 20 10',
105 -id => 'human_id',
106 -accession_number => 'AL000012',
107 );
108
109 Function: Returns a new Bio::Seq::PrimaryQual object from basic
110 constructors, being a string _or_ a reference to an array for the
111 sequence and strings for id and accession_number. Note that you
112 can provide an empty quality string.
113 Returns : a new Bio::Seq::PrimaryQual object
114
115 =cut
116
117
118
119
120 sub new {
121 my ($class, @args) = @_;
122 my $self = $class->SUPER::new(@args);
123
124 # default: turn ON the warnings (duh)
125 my($qual,$id,$acc,$pid,$desc,$given_id) =
126 $self->_rearrange([qw(QUAL
127 DISPLAY_ID
128 ACCESSION_NUMBER
129 PRIMARY_ID
130 DESC
131 ID
132 )],
133 @args);
134 if( defined $id && defined $given_id ) {
135 if( $id ne $given_id ) {
136 $self->throw("Provided both id and display_id constructor functions. [$id] [$given_id]");
137 }
138 }
139 if( defined $given_id ) { $id = $given_id; }
140
141 # note: the sequence string may be empty
142 $self->qual($qual ? $qual : []);
143 $id && $self->display_id($id);
144 $acc && $self->accession_number($acc);
145 $pid && $self->primary_id($pid);
146 $desc && $self->desc($desc);
147
148 return $self;
149 }
150
151 =head2 qual()
152
153 Title : qual()
154 Usage : @quality_values = @{$obj->qual()};
155 Function: Returns the quality as a reference to an array containing the
156 quality values. The individual elements of the quality array are
157 not validated and can be any numeric value.
158 Returns : A reference to an array.
159
160 =cut
161
162 sub qual {
163 my ($self,$value) = @_;
164
165 if( ! defined $value || length($value) == 0 ) {
166 $self->{'qual'} ||= [];
167 } elsif( ref($value) =~ /ARRAY/i ) {
168 # if the user passed in a reference to an array
169 $self->{'qual'} = $value;
170 } elsif(! $self->validate_qual($value)){
171 $self->throw("Attempting to set the quality to [$value] which does not look healthy");
172 } else {
173 $self->{'qual'} = [split(/\s+/,$value)];
174 }
175
176 return $self->{'qual'};
177 }
178
179 =head2 validate_qual($qualstring)
180
181 Title : validate_qual($qualstring)
182 Usage : print("Valid.") if { &validate_qual($self,$qualities); }
183 Function: Make sure that the quality, if it has length > 0, contains at
184 least one digit. Note that quality strings are parsed into arrays
185 using split/\d+/,$quality_string, so make sure that your quality
186 scalar looks like this if you want it to be parsed properly.
187 Returns : 1 for a valid sequence (WHY? Shouldn\'t it return 0? <boggle>)
188 Args : a scalar (any scalar, why PrimarySeq author?) and a scalar
189 containing the string to validate.
190
191 =cut
192
193 sub validate_qual {
194 # how do I validate quality values?
195 # \d+\s+\d+..., I suppose
196 my ($self,$qualstr) = @_;
197 # why the CORE?? -- (Because Bio::PrimarySeqI namespace has a
198 # length method, you have to qualify
199 # which length to use)
200 return 0 if (!defined $qualstr || CORE::length($qualstr) <= 0);
201 return 1 if( $qualstr =~ /\d/);
202
203 return 0;
204 }
205
206 =head2 subqual($start,$end)
207
208 Title : subqual($start,$end)
209 Usage : @subset_of_quality_values = @{$obj->subqual(10,40)};
210 Function: returns the quality values from $start to $end, where the
211 first value is 1 and the number is inclusive, ie 1-2 are the
212 first two bases of the sequence. Start cannot be larger than
213 end but can be equal.
214 Returns : A reference to an array.
215 Args : a start position and an end position
216
217 =cut
218
219
220 sub subqual {
221 my ($self,$start,$end) = @_;
222
223 if( $start > $end ){
224 $self->throw("in subqual, start [$start] has to be greater than end [$end]");
225 }
226
227 if( $start <= 0 || $end > $self->length ) {
228 $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
229 }
230
231 # remove one from start, and then length is end-start
232
233 $start--;
234 $end--;
235 my @sub_qual_array = @{$self->{qual}}[$start..$end];
236
237 # return substr $self->seq(), $start, ($end-$start);
238 return \@sub_qual_array;
239
240 }
241
242 =head2 display_id()
243
244 Title : display_id()
245 Usage : $id_string = $obj->display_id();
246 Function: returns the display id, aka the common name of the Quality
247 object.
248 The semantics of this is that it is the most likely string to be
249 used as an identifier of the quality sequence, and likely to have
250 "human" readability. The id is equivalent to the ID field of the
251 GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
252 database. In fasta format, the >(\S+) is presumed to be the id,
253 though some people overload the id to embed other information.
254 Bioperl does not use any embedded information in the ID field,
255 and people are encouraged to use other mechanisms (accession
256 field for example, or extending the sequence object) to solve
257 this. Notice that $seq->id() maps to this function, mainly for
258 legacy/convience issues
259 Returns : A string
260 Args : None
261
262 =cut
263
264 sub display_id {
265 my ($obj,$value) = @_;
266 if( defined $value) {
267 $obj->{'display_id'} = $value;
268 }
269 return $obj->{'display_id'};
270
271 }
272
273 =head2 accession_number()
274
275 Title : accession_number()
276 Usage : $unique_biological_key = $obj->accession_number();
277 Function: Returns the unique biological id for a sequence, commonly
278 called the accession_number. For sequences from established
279 databases, the implementors should try to use the correct
280 accession number. Notice that primary_id() provides the unique id
281 for the implemetation, allowing multiple objects to have the same
282 accession number in a particular implementation. For sequences
283 with no accession number, this method should return "unknown".
284 Returns : A string
285 Args : None
286
287 =cut
288
289 sub accession_number {
290 my( $obj, $acc ) = @_;
291
292 if (defined $acc) {
293 $obj->{'accession_number'} = $acc;
294 } else {
295 $acc = $obj->{'accession_number'};
296 $acc = 'unknown' unless defined $acc;
297 }
298 return $acc;
299 }
300
301 =head2 primary_id()
302
303 Title : primary_id()
304 Usage : $unique_implementation_key = $obj->primary_id();
305 Function: Returns the unique id for this object in this implementation.
306 This allows implementations to manage their own object ids in a
307 way the implementaiton can control clients can expect one id to
308 map to one object. For sequences with no accession number, this
309 method should return a stringified memory location.
310 Returns : A string
311 Args : None
312
313 =cut
314
315 sub primary_id {
316 my ($obj,$value) = @_;
317 if( defined $value) {
318 $obj->{'primary_id'} = $value;
319 }
320 return $obj->{'primary_id'};
321
322 }
323
324 =head2 desc()
325
326 Title : desc()
327 Usage : $qual->desc($newval);
328 $description = $qual->desc();
329 Function: Get/set description text for a qual object
330 Example :
331 Returns : Value of desc
332 Args : newvalue (optional)
333
334 =cut
335
336 sub desc {
337 my ($obj,$value) = @_;
338 if( defined $value) {
339 $obj->{'desc'} = $value;
340 }
341 return $obj->{'desc'};
342
343 }
344
345 =head2 id()
346
347 Title : id()
348 Usage : $id = $qual->id();
349 Function: Return the ID of the quality. This should normally be (and
350 actually is in the implementation provided here) just a synonym
351 for display_id().
352 Returns : A string.
353 Args : None.
354
355 =cut
356
357 sub id {
358 my ($self,$value) = @_;
359 if( defined $value ) {
360 return $self->display_id($value);
361 }
362 return $self->display_id();
363 }
364
365 =head2 length()
366
367 Title : length()
368 Usage : $length = $qual->length();
369 Function: Return the length of the array holding the quality values.
370 Under most circumstances, this should match the number of quality
371 values but no validation is done when the PrimaryQual object is
372 constructed and non-digits could be put into this array. Is this
373 a bug? Just enough rope...
374 Returns : A scalar (the number of elements in the quality array).
375 Args : None.
376
377 =cut
378
379 sub length {
380 my $self = shift;
381 if (ref($self->{qual}) ne "ARRAY") {
382 $self->warn("{qual} is not an array here. Why? It appears to be ".ref($self->{qual})."(".$self->{qual}."). Good thing this can _never_ happen.");
383 }
384 return scalar(@{$self->{qual}});
385 }
386
387 =head2 qualat($position)
388
389 Title : qualat($position)
390 Usage : $quality = $obj->qualat(10);
391 Function: Return the quality value at the given location, where the
392 first value is 1 and the number is inclusive, ie 1-2 are the first
393 two bases of the sequence. Start cannot be larger than end but can
394 be equal.
395 Returns : A scalar.
396 Args : A position.
397
398 =cut
399
400 sub qualat {
401 my ($self,$val) = @_;
402 my @qualat = @{$self->subqual($val,$val)};
403 if (scalar(@qualat) == 1) {
404 return $qualat[0];
405 }
406 else {
407 $self->throw("AAAH! qualat provided more then one quality.");
408 }
409 }
410
411 =head2 to_string()
412
413 Title : to_string()
414 Usage : $quality = $obj->to_string();
415 Function: Return a textual representation of what the object contains.
416 For this module, this function will return:
417 qual
418 display_id
419 accession_number
420 primary_id
421 desc
422 id
423 length
424 Returns : A scalar.
425 Args : None.
426
427 =cut
428
429 sub to_string {
430 my ($self,$out,$result) = shift;
431 $out = "qual: ".join(',',@{$self->qual()});
432 foreach (qw(display_id accession_number primary_id desc id)) {
433 $result = $self->$_();
434 if (!$result) { $result = "<unset>"; }
435 $out .= "$_: $result\n";
436 }
437 return $out;
438 }
439
440
441 sub to_string_automatic {
442 my ($self,$sub_result,$out) = shift;
443 foreach (sort keys %$self) {
444 print("Working on $_\n");
445 eval { $self->$_(); };
446 if ($@) { $sub_result = ref($_); }
447 elsif (!($sub_result = $self->$_())) {
448 $sub_result = "<unset>";
449 }
450 if (ref($sub_result) eq "ARRAY") {
451 print("This thing ($_) is an array!\n");
452 $sub_result = join(',',@$sub_result);
453 }
454 $out .= "$_: ".$sub_result."\n";
455 }
456 return $out;
457 }
458
459 1;