comparison variant_effect_predictor/Bio/Seq/SequenceTrace.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: SequenceTrace.pm,v 1.1.2.1 2003/03/25 12:32:16 heikki Exp $
2 #
3 # BioPerl module for Bio::Seq::SeqWithQuality
4 #
5 # Cared for by Chad Matsalla <bioinformatics@dieselwurks.com
6 #
7 # Copyright Chad Matsalla
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Seq::SequenceTrace - Bioperl object packaging a sequence with its trace
16
17 =head1 SYNOPSIS
18
19 # example code here
20
21 =head1 DESCRIPTION
22
23 This object stores a sequence with its trace.
24
25 =head1 FEEDBACK
26
27 =head2 Mailing Lists
28
29 User feedback is an integral part of the evolution of this and other
30 Bioperl modules. Send your comments and suggestions preferably to one
31 of the Bioperl mailing lists. Your participation is much appreciated.
32
33 bioperl-l@bioperl.org - General discussion
34 http://bio.perl.org/MailList.html - About the mailing lists
35
36 =head2 Reporting Bugs
37
38 Report bugs to the Bioperl bug tracking system to help us keep track
39 the bugs and their resolution. Bug reports can be submitted via email
40 or the web:
41
42 bioperl-bugs@bio.perl.org
43 http://bugzilla.bioperl.org/
44
45 =head1 AUTHOR - Chad Matsalla
46
47 Email bioinformatics@dieselwurks.com
48
49 =head1 CONTRIBUTORS
50
51 Jason Stajich, jason@bioperl.org
52
53 =head1 APPENDIX
54
55 The rest of the documentation details each of the object methods.
56 Internal methods are usually preceded with a _
57
58 =cut
59
60
61 package Bio::Seq::SequenceTrace;
62
63 use vars qw(@ISA);
64
65 use strict;
66 use Bio::Root::Root;
67 use Bio::Seq::QualI;
68 use Bio::PrimarySeqI;
69 use Bio::PrimarySeq;
70 use Bio::Seq::PrimaryQual;
71 use Bio::Seq::TraceI;
72
73 @ISA = qw(Bio::Root::Root Bio::Seq::SeqWithQuality Bio::Seq::TraceI);
74
75 =head2 new()
76
77 Title : new()
78 Usage : $st = Bio::Seq::SequenceTrace->new
79 ( -sequencewithquality => Bio::Seq::SequenceWithQuality,
80 -trace_a => \@trace_values_for_a_channel,
81 -trace_t => \@trace_values_for_t_channel,
82 -trace_g => \@trace_values_for_g_channel,
83 -trace_c => \@trace_values_for_c_channel,
84 -trace_indices => '0 5 10 15 20 25 30 35'
85 );
86 Function: Returns a new Bio::Seq::SequenceTrace object from basic
87 constructors.
88 Returns : a new Bio::Seq::SequenceTrace object
89 Arguments: I think that these are all describes in the usage above.
90
91 =cut
92
93 sub new {
94 my ($class, @args) = @_;
95 my $self = $class->SUPER::new(@args);
96 # default: turn OFF the warnings
97 $self->{supress_warnings} = 1;
98 my($sequence_with_quality,$trace_indices,$trace_a,$trace_t,
99 $trace_g,$trace_c) =
100 $self->_rearrange([qw(
101 SEQUENCEWITHQUALITY
102 TRACE_INDICES
103 TRACE_A
104 TRACE_T
105 TRACE_G)], @args);
106 # first, deal with the sequence and quality information
107 if ($sequence_with_quality && ref($sequence_with_quality) eq "Bio::Seq::SeqWithQuality") {
108 $self->{swq} = $sequence_with_quality;
109 }
110 else {
111 $self->throw("A Bio::Seq::SequenceTrace object must be created with a
112 Bio::Seq::SeqWithQuality object.");
113 }
114 $self->{trace_a} = $trace_a ? $trace_a : undef;
115 $self->{trace_t} = $trace_t ? $trace_t : undef;
116 $self->{trace_g} = $trace_g ? $trace_g : undef;
117 $self->{trace_c} = $trace_c ? $trace_c : undef;
118 $self->{trace_indices} = $trace_indices ? $trace_indices : undef;
119 return $self;
120 }
121
122 =head2 trace($base,\@new_values)
123
124 Title : trace($base,\@new_values)
125 Usage : @trace_Values = @{$obj->trace($base,\@new_values)};
126 Function: Returns the trace values as a reference to an array containing the
127 trace values. The individual elements of the trace array are not validated
128 and can be any numeric value.
129 Returns : A reference to an array.
130 Status :
131 Arguments: $base : which color channel would you like the trace values for?
132 - $base must be one of "A","T","G","C"
133 \@new_values : a reference to an array of values containing trace
134 data for this base
135
136 =cut
137
138 sub trace {
139 my ($self,$base_channel,$values) = @_;
140 $base_channel =~ tr/A-Z/a-z/;
141 if (length($base_channel) > 1 && $base_channel !~ /a|t|g|c/) {
142 $self->throw("The base channel must be a, t, g, or c");
143 }
144 if ( $values && ref($values) eq "ARRAY") {
145 $self->{trace_$base_channel} = $values;
146 }
147 elsif ($values) {
148 $self->warn("You tried to change the traces for the $base_channel but
149 the values you wave were not a reference to an array.");
150 }
151 return $self->{trace_$base_channel};
152 }
153
154
155 =head2 trace_indices($new_indices)
156
157 Title : trace_indices($new_indices)
158 Usage : $indices = $obj->trace_indices($new_indices);
159 Function: Return the trace iindex points for this object.
160 Returns : A scalar
161 Args : If used, the trace indices will be set to the provided value.
162
163 =cut
164
165 sub trace_indices {
166 my ($self,$trace_indices)= @_;
167 if ($trace_indices) { $self->{trace_indices} = $trace_indices; }
168 return $self->{trace_indices};
169 }
170
171
172
173
174
175
176
177
178
179
180
181
182
183 =head2 _common_id()
184
185 Title : _common_id()
186 Usage : $common_id = $self->_common_id();
187 Function: Compare the display_id of {qual_ref} and {seq_ref}.
188 Returns : Nothing if they don't match. If they do return
189 {seq_ref}->display_id()
190 Args : None.
191
192 =cut
193
194 #'
195 sub _common_id {
196 my $self = shift;
197 return if (!$self->{seq_ref} || !$self->{qual_ref});
198 my $sid = $self->{seq_ref}->display_id();
199 return if (!$sid);
200 return if (!$self->{qual_ref}->display_id());
201 return $sid if ($sid eq $self->{qual_ref}->display_id());
202 # should this become a warning?
203 # print("ids $sid and $self->{qual_ref}->display_id() do not match. Bummer.\n");
204 }
205
206 =head2 _common_display_id()
207
208 Title : _common_id()
209 Usage : $common_id = $self->_common_display_id();
210 Function: Compare the display_id of {qual_ref} and {seq_ref}.
211 Returns : Nothing if they don't match. If they do return
212 {seq_ref}->display_id()
213 Args : None.
214
215 =cut
216
217 #'
218 sub _common_display_id {
219 my $self = shift;
220 $self->common_id();
221 }
222
223 =head2 _common_accession_number()
224
225 Title : _common_accession_number()
226 Usage : $common_id = $self->_common_accession_number();
227 Function: Compare the accession_number() of {qual_ref} and {seq_ref}.
228 Returns : Nothing if they don't match. If they do return
229 {seq_ref}->accession_number()
230 Args : None.
231
232 =cut
233
234 #'
235 sub _common_accession_number {
236 my $self = shift;
237 return if ($self->{seq_ref} || $self->{qual_ref});
238 my $acc = $self->{seq_ref}->accession_number();
239 # if (!$acc) { print("the seqref has no acc.\n"); }
240 return if (!$acc);
241 # if ($acc eq $self->{qual_ref}->accession_number()) { print("$acc matches ".$self->{qual_ref}->accession_number()."\n"); }
242 return $acc if ($acc eq $self->{qual_ref}->accession_number());
243 # should this become a warning?
244 # print("accession numbers $acc and $self->{qual_ref}->accession_number() do not match. Bummer.\n");
245 }
246
247 =head2 _common_primary_id()
248
249 Title : _common_primary_id()
250 Usage : $common_primard_id = $self->_common_primary_id();
251 Function: Compare the primary_id of {qual_ref} and {seq_ref}.
252 Returns : Nothing if they don't match. If they do return
253 {seq_ref}->primary_id()
254 Args : None.
255
256 =cut
257
258 #'
259 sub _common_primary_id {
260 my $self = shift;
261 return if ($self->{seq_ref} || $self->{qual_ref});
262 my $pid = $self->{seq_ref}->primary_id();
263 return if (!$pid);
264 return $pid if ($pid eq $self->{qual_ref}->primary_id());
265 # should this become a warning?
266 # print("primary_ids $pid and $self->{qual_ref}->primary_id() do not match. Bummer.\n");
267
268 }
269
270 =head2 _common_desc()
271
272 Title : _common_desc()
273 Usage : $common_desc = $self->_common_desc();
274 Function: Compare the desc of {qual_ref} and {seq_ref}.
275 Returns : Nothing if they don't match. If they do return
276 {seq_ref}->desc()
277 Args : None.
278
279 =cut
280
281 #'
282 sub _common_desc {
283 my $self = shift;
284 return if ($self->{seq_ref} || $self->{qual_ref});
285 my $des = $self->{seq_ref}->desc();
286 return if (!$des);
287 return $des if ($des eq $self->{qual_ref}->desc());
288 # should this become a warning?
289 # print("descriptions $des and $self->{qual_ref}->desc() do not match. Bummer.\n");
290
291 }
292
293 =head2 set_common_descriptors()
294
295 Title : set_common_descriptors()
296 Usage : $self->set_common_descriptors();
297 Function: Compare the descriptors (id,accession_number,display_id,
298 primary_id, desc) for the PrimarySeq and PrimaryQual objects
299 within the SeqWithQuality object. If they match, make that
300 descriptor the descriptor for the SeqWithQuality object.
301 Returns : Nothing.
302 Args : None.
303
304 =cut
305
306 sub set_common_descriptors {
307 my $self = shift;
308 return if ($self->{seq_ref} || $self->{qual_ref});
309 &_common_id();
310 &_common_display_id();
311 &_common_accession_number();
312 &_common_primary_id();
313 &_common_desc();
314 }
315
316 =head2 alphabet()
317
318 Title : alphabet();
319 Usage : $molecule_type = $obj->alphabet();
320 Function: Get the molecule type from the PrimarySeq object.
321 Returns : What what PrimarySeq says the type of the sequence is.
322 Args : None.
323
324 =cut
325
326 sub alphabet {
327 my $self = shift;
328 return $self->{seq_ref}->alphabet();
329 }
330
331 =head2 display_id()
332
333 Title : display_id()
334 Usage : $id_string = $obj->display_id();
335 Function: Returns the display id, aka the common name of the Quality
336 object.
337 The semantics of this is that it is the most likely string to be
338 used as an identifier of the quality sequence, and likely to have
339 "human" readability. The id is equivalent to the ID field of the
340 GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
341 database. In fasta format, the >(\S+) is presumed to be the id,
342 though some people overload the id to embed other information.
343 Bioperl does not use any embedded information in the ID field,
344 and people are encouraged to use other mechanisms (accession
345 field for example, or extending the sequence object) to solve
346 this. Notice that $seq->id() maps to this function, mainly for
347 legacy/convience issues.
348 This method sets the display_id for the SeqWithQuality object.
349 Returns : A string
350 Args : If a scalar is provided, it is set as the new display_id for
351 the SeqWithQuality object.
352 Status : Virtual
353
354 =cut
355
356 sub display_id {
357 my ($obj,$value) = @_;
358 if( defined $value) {
359 $obj->{'display_id'} = $value;
360 }
361 return $obj->{'display_id'};
362
363 }
364
365 =head2 accession_number()
366
367 Title : accession_number()
368 Usage : $unique_biological_key = $obj->accession_number();
369 Function: Returns the unique biological id for a sequence, commonly
370 called the accession_number. For sequences from established
371 databases, the implementors should try to use the correct
372 accession number. Notice that primary_id() provides the unique id
373 for the implemetation, allowing multiple objects to have the same
374 accession number in a particular implementation. For sequences
375 with no accession number, this method should return "unknown".
376 This method sets the accession_number for the SeqWithQuality
377 object.
378 Returns : A string (the value of accession_number)
379 Args : If a scalar is provided, it is set as the new accession_number
380 for the SeqWithQuality object.
381 Status : Virtual
382
383
384 =cut
385
386 sub accession_number {
387 my( $obj, $acc ) = @_;
388
389 if (defined $acc) {
390 $obj->{'accession_number'} = $acc;
391 } else {
392 $acc = $obj->{'accession_number'};
393 $acc = 'unknown' unless defined $acc;
394 }
395 return $acc;
396 }
397
398 =head2 primary_id()
399
400 Title : primary_id()
401 Usage : $unique_implementation_key = $obj->primary_id();
402 Function: Returns the unique id for this object in this implementation.
403 This allows implementations to manage their own object ids in a
404 way the implementaiton can control clients can expect one id to
405 map to one object. For sequences with no accession number, this
406 method should return a stringified memory location.
407 This method sets the primary_id for the SeqWithQuality
408 object.
409 Returns : A string. (the value of primary_id)
410 Args : If a scalar is provided, it is set as the new primary_id for
411 the SeqWithQuality object.
412
413 =cut
414
415 sub primary_id {
416 my ($obj,$value) = @_;
417 if ($value) {
418 $obj->{'primary_id'} = $value;
419 }
420 return $obj->{'primary_id'};
421
422 }
423
424 =head2 desc()
425
426 Title : desc()
427 Usage : $qual->desc($newval); _or_
428 $description = $qual->desc();
429 Function: Get/set description text for this SeqWithQuality object.
430 Returns : A string. (the value of desc)
431 Args : If a scalar is provided, it is set as the new desc for the
432 SeqWithQuality object.
433
434 =cut
435
436 sub desc {
437 # a mechanism to set the disc for the SeqWithQuality object.
438 # probably will be used most often by set_common_features()
439 my ($obj,$value) = @_;
440 if( defined $value) {
441 $obj->{'desc'} = $value;
442 }
443 return $obj->{'desc'};
444 }
445
446 =head2 id()
447
448 Title : id()
449 Usage : $id = $qual->id();
450 Function: Return the ID of the quality. This should normally be (and
451 actually is in the implementation provided here) just a synonym
452 for display_id().
453 Returns : A string. (the value of id)
454 Args : If a scalar is provided, it is set as the new id for the
455 SeqWithQuality object.
456
457 =cut
458
459 sub id {
460 my ($self,$value) = @_;
461 if (!$self) { $self->throw("no value for self in $value"); }
462 if( defined $value ) {
463 return $self->display_id($value);
464 }
465 return $self->display_id();
466 }
467
468 =head2 seq
469
470 Title : seq()
471 Usage : $string = $obj->seq(); _or_
472 $obj->seq("atctatcatca");
473 Function: Returns the sequence that is contained in the imbedded in the
474 PrimarySeq object within the SeqWithQuality object
475 Returns : A scalar (the seq() value for the imbedded PrimarySeq object.)
476 Args : If a scalar is provided, the SeqWithQuality object will
477 attempt to set that as the sequence for the imbedded PrimarySeq
478 object. Otherwise, the value of seq() for the PrimarySeq object
479 is returned.
480 Notes : This is probably not a good idea because you then should call
481 length() to make sure that the sequence and quality are of the
482 same length. Even then, how can you make sure that this sequence
483 belongs with that quality? I provided this to give you rope to
484 hang yourself with. Tie it to a strong device and use a good
485 knot.
486
487 =cut
488
489 sub seq {
490 my ($self,$value) = @_;
491 if( defined $value) {
492 $self->{seq_ref}->seq($value);
493 $self->length();
494 }
495 return $self->{seq_ref}->seq();
496 }
497
498 =head2 qual()
499
500 Title : qual()
501 Usage : @quality_values = @{$obj->qual()}; _or_
502 $obj->qual("10 10 20 40 50");
503 Function: Returns the quality as imbedded in the PrimaryQual object
504 within the SeqWithQuality object.
505 Returns : A reference to an array containing the quality values in the
506 PrimaryQual object.
507 Args : If a scalar is provided, the SeqWithQuality object will
508 attempt to set that as the quality for the imbedded PrimaryQual
509 object. Otherwise, the value of qual() for the PrimaryQual
510 object is returned.
511 Notes : This is probably not a good idea because you then should call
512 length() to make sure that the sequence and quality are of the
513 same length. Even then, how can you make sure that this sequence
514 belongs with that quality? I provided this to give you a strong
515 board with which to flagellate yourself.
516
517 =cut
518
519 sub qual {
520 my ($self,$value) = @_;
521
522 if( defined $value) {
523 $self->{qual_ref}->qual($value);
524 # update the lengths
525 $self->length();
526 }
527 return $self->{qual_ref}->qual();
528 }
529
530
531
532
533 =head2 length()
534
535 Title : length()
536 Usage : $length = $seqWqual->length();
537 Function: Get the length of the SeqWithQuality sequence/quality.
538 Returns : Returns the length of the sequence and quality if they are
539 both the same. Returns "DIFFERENT" if they differ.
540 Args : None.
541
542 =cut
543
544 sub length {
545 my $self = shift;
546 # what do I return here? Whew. Ambiguity...
547 ########
548
549 }
550
551
552 =head2 qual_obj
553
554 Title : qual_obj($different_obj)
555 Usage : $qualobj = $seqWqual->qual_obj(); _or_
556 $qualobj = $seqWqual->qual_obj($ref_to_primaryqual_obj);
557 Function: Get the PrimaryQual object that is imbedded in the
558 SeqWithQuality object or if a reference to a PrimaryQual object
559 is provided, set this as the PrimaryQual object imbedded in the
560 SeqWithQuality object.
561 Returns : A reference to a Bio::Seq::SeqWithQuality object.
562
563 =cut
564
565 sub qual_obj {
566 my ($self,$value) = @_;
567 return $self->{swq}->qual_obj($value);
568 }
569
570
571 =head2 seq_obj
572
573 Title : seq_obj()
574 Usage : $seqobj = $seqWqual->qual_obj(); _or_
575 $seqobj = $seqWqual->seq_obj($ref_to_primary_seq_obj);
576 Function: Get the PrimarySeq object that is imbedded in the
577 SeqWithQuality object or if a reference to a PrimarySeq object is
578 provided, set this as the PrimarySeq object imbedded in the
579 SeqWithQuality object.
580 Returns : A reference to a Bio::PrimarySeq object.
581
582 =cut
583
584 sub seq_obj {
585 my ($self,$value) = @_;
586 return $self->{swq}->seq_obj($value);
587 }
588
589 =head2 _set_descriptors
590
591 Title : _set_descriptors()
592 Usage : $seqWqual->_qual_obj($qual,$seq,$id,$acc,$pid,$desc,$given_id,
593 $alphabet);
594 Function: Set the descriptors for the SeqWithQuality object. Try to
595 match the descriptors in the PrimarySeq object and in the
596 PrimaryQual object if descriptors were not provided with
597 construction.
598 Returns : Nothing.
599 Args : $qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet as found
600 in the new() method.
601 Notes : Really only intended to be called by the new() method. If
602 you want to invoke a similar function try
603 set_common_descriptors().
604
605 =cut
606
607
608 sub _set_descriptors {
609 my ($self,$qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet) = @_;
610 $self->{swq}->_seq_descriptors($qual,$seq,$id,$acc,$pid,$desc,$given_id,$alphabet);
611 }
612
613 =head2 subseq($start,$end)
614
615 Title : subseq($start,$end)
616 Usage : $subsequence = $obj->subseq($start,$end);
617 Function: Returns the subseq from start to end, where the first base
618 is 1 and the number is inclusive, ie 1-2 are the first two
619 bases of the sequence.
620 Returns : A string.
621 Args : Two positions.
622
623 =cut
624
625 sub subseq {
626 my ($self,@args) = @_;
627 # does a single value work?
628 return $self->{swq}->subseq(@args);
629 }
630
631 =head2 baseat($position)
632
633 Title : baseat($position)
634 Usage : $base_at_position_6 = $obj->baseat("6");
635 Function: Returns a single base at the given position, where the first
636 base is 1 and the number is inclusive, ie 1-2 are the first two
637 bases of the sequence.
638 Returns : A scalar.
639 Args : A position.
640
641 =cut
642
643 sub baseat {
644 my ($self,$val) = @_;
645 return $self->{swq}->subseq($val,$val);
646 }
647
648 =head2 subqual($start,$end)
649
650 Title : subqual($start,$end)
651 Usage : @qualities = @{$obj->subqual(10,20);
652 Function: returns the quality values from $start to $end, where the
653 first value is 1 and the number is inclusive, ie 1-2 are the
654 first two bases of the sequence. Start cannot be larger than
655 end but can be equal.
656 Returns : A reference to an array.
657 Args : a start position and an end position
658
659 =cut
660
661 sub subqual {
662 my ($self,@args) = @_;
663 return $self->{swq}->subqual(@args);
664 }
665
666 =head2 qualat($position)
667
668 Title : qualat($position)
669 Usage : $quality = $obj->qualat(10);
670 Function: Return the quality value at the given location, where the
671 first value is 1 and the number is inclusive, ie 1-2 are the
672 first two bases of the sequence. Start cannot be larger than
673 end but can be equal.
674 Returns : A scalar.
675 Args : A position.
676
677 =cut
678
679 sub qualat {
680 my ($self,$val) = @_;
681 return $self->{swq}->qualat($val);
682 }
683
684 =head2 sub_trace_index($start,$end)
685
686 Title : sub_trace_index($start,$end)
687 Usage : @trace_indices = @{$obj->sub_trace_index(10,20);
688 Function: returns the trace index values from $start to $end, where the
689 first value is 1 and the number is inclusive, ie 1-2 are the
690 first two bases of the sequence. Start cannot be larger than
691 end but can be e_trace_index.
692 Returns : A reference to an array.
693 Args : a start position and an end position
694
695 =cut
696
697 sub sub_trace_index {
698 my ($self,$start,$end) = @_;
699
700 if( $start > $end ){
701 $self->throw("in sub_trace_index, start [$start] has to be greater than end [$end]");
702 }
703
704 if( $start <= 0 || $end > $self->length ) {
705 $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
706 }
707
708 # remove one from start, and then length is end-start
709
710 $start--;
711 $end--;
712 my @sub_trace_index_array = @{$self->{trace_indices}}[$start..$end];
713
714 # return substr $self->seq(), $start, ($end-$start);
715 return \@sub_trace_index_array;
716
717 }
718
719
720
721
722 =head2 trace_index_at($position)
723
724 Title : trace_index_at($position)
725 Usage : $trace_index = $obj->trace_index_at(10);
726 Function: Return the trace_index value at the given location, where the
727 first value is 1 and the number is inclusive, ie 1-2 are the
728 first two bases of the sequence. Start cannot be larger than
729 end but can be etrace_index_.
730 Returns : A scalar.
731 Args : A position.
732
733 =cut
734
735 sub trace_index_at {
736 my ($self,$val) = @_;
737 my @trace_index_at = @{$self->sub_trace_index($val,$val)};
738 if (scalar(@trace_index_at) == 1) {
739 return $trace_index_at[0];
740 }
741 else {
742 $self->throw("AAAH! trace_index_at provided more then one quality.");
743 }
744 }
745
746
747 1;