comparison variant_effect_predictor/Bio/Align/AlignI.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: AlignI.pm,v 1.7 2002/10/22 07:45:10 lapp Exp $
2 #
3 # BioPerl module for Bio::Align::AlignI
4 #
5 # Cared for by Jason Stajich <jason@bioperl.org>
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Align::AlignI - An interface for describing sequence alignments.
16
17 =head1 SYNOPSIS
18
19 # get a Bio::Align::AlignI somehow - typically using Bio::AlignIO system
20 # some descriptors
21 print $aln->length, "\n";
22 print $aln->no_residues, "\n";
23 print $aln->is_flush, "\n";
24 print $aln->no_sequences, "\n";
25 print $aln->percentage_identity, "\n";
26 print $aln->consensus_string(50), "\n";
27
28 # find the position in the alignment for a sequence location
29 $pos = $aln->column_from_residue_number('1433_LYCES', 14); # = 6;
30
31 # extract sequences and check values for the alignment column $pos
32 foreach $seq ($aln->each_seq) {
33 $res = $seq->subseq($pos, $pos);
34 $count{$res}++;
35 }
36 foreach $res (keys %count) {
37 printf "Res: %s Count: %2d\n", $res, $count{$res};
38 }
39
40 =head1 DESCRIPTION
41
42 This interface describes the basis for alignment objects.
43
44 =head1 FEEDBACK
45
46 =head2 Mailing Lists
47
48 User feedback is an integral part of the evolution of this and other
49 Bioperl modules. Send your comments and suggestions preferably to
50 the Bioperl mailing list. Your participation is much appreciated.
51
52 bioperl-l@bioperl.org - General discussion
53 http://bioperl.org/MailList.shtml - About the mailing lists
54
55 =head2 Reporting Bugs
56
57 Report bugs to the Bioperl bug tracking system to help us keep track
58 of the bugs and their resolution. Bug reports can be submitted via
59 email or the web:
60
61 bioperl-bugs@bioperl.org
62 http://bugzilla.bioperl.org/
63
64 =head1 AUTHOR - Jason Stajich
65
66 Email jason@bioperl.org
67
68 =head1 CONTRIBUTORS
69
70 Ewan Birney, birney@ebi.ac.uk
71 Heikki Lehvaslaiho, heikki@ebi.ac.uk
72
73 =head1 APPENDIX
74
75 The rest of the documentation details each of the object methods.
76 Internal methods are usually preceded with a _
77
78 =cut
79
80
81 # Let the code begin...
82
83
84 package Bio::Align::AlignI;
85 use vars qw(@ISA);
86 use strict;
87
88 use Bio::Root::RootI;
89
90 @ISA = qw(Bio::Root::RootI);
91
92 =head1 Modifier methods
93
94 These methods modify the MSE by adding, removing or shuffling complete
95 sequences.
96
97 =head2 add_seq
98
99 Title : add_seq
100 Usage : $myalign->add_seq($newseq);
101 Function : Adds another sequence to the alignment. *Does not* align
102 it - just adds it to the hashes.
103 Returns : nothing
104 Argument : a Bio::LocatableSeq object
105 order (optional)
106
107 See L<Bio::LocatableSeq> for more information.
108
109 =cut
110
111 sub add_seq {
112 my ($self) = @_;
113 $self->throw_not_implemented();
114 }
115
116 =head2 remove_seq
117
118 Title : remove_seq
119 Usage : $aln->remove_seq($seq);
120 Function : Removes a single sequence from an alignment
121 Returns :
122 Argument : a Bio::LocatableSeq object
123
124 =cut
125
126 sub remove_seq {
127 my ($self) = @_;
128 $self->throw_not_implemented();
129 }
130
131 =head2 purge
132
133 Title : purge
134 Usage : $aln->purge(0.7);
135 Function:
136
137 Removes sequences above whatever %id.
138
139 This function will grind on large alignments. Beware!
140 (perhaps not ideally implemented)
141
142 Example :
143 Returns : An array of the removed sequences
144 Argument:
145
146
147 =cut
148
149 sub purge {
150 my ($self) = @_;
151 $self->throw_not_implemented();
152 }
153
154 =head2 sort_alphabetically
155
156 Title : sort_alphabetically
157 Usage : $ali->sort_alphabetically
158 Function :
159
160 Changes the order of the alignemnt to alphabetical on name
161 followed by numerical by number.
162
163 Returns :
164 Argument :
165
166 =cut
167
168 sub sort_alphabetically {
169 my ($self) = @_;
170 $self->throw_not_implemented();
171 }
172
173 =head1 Sequence selection methods
174
175 Methods returning one or more sequences objects.
176
177 =head2 each_seq
178
179 Title : each_seq
180 Usage : foreach $seq ( $align->each_seq() )
181 Function : Gets an array of Seq objects from the alignment
182 Returns : an array
183 Argument :
184
185 =cut
186
187 sub each_seq {
188 my ($self) = @_;
189 $self->throw_not_implemented();
190 }
191
192 =head2 each_alphabetically
193
194 Title : each_alphabetically
195 Usage : foreach $seq ( $ali->each_alphabetically() )
196 Function :
197
198 Returns an array of sequence object sorted alphabetically
199 by name and then by start point.
200 Does not change the order of the alignment
201
202 Returns :
203 Argument :
204
205 =cut
206
207 sub each_alphabetically {
208 my($self) = @_;
209 $self->throw_not_implemented();
210 }
211
212 =head2 each_seq_with_id
213
214 Title : each_seq_with_id
215 Usage : foreach $seq ( $align->each_seq_with_id() )
216 Function :
217
218 Gets an array of Seq objects from the
219 alignment, the contents being those sequences
220 with the given name (there may be more than one)
221
222 Returns : an array
223 Argument : a seq name
224
225 =cut
226
227 sub each_seq_with_id {
228 my ($self) = @_;
229 $self->throw_not_implemented();
230 }
231
232 =head2 get_seq_by_pos
233
234 Title : get_seq_by_pos
235 Usage : $seq = $aln->get_seq_by_pos(3) # third sequence from the alignment
236 Function :
237
238 Gets a sequence based on its position in the alignment.
239 Numbering starts from 1. Sequence positions larger than
240 no_sequences() will thow an error.
241
242 Returns : a Bio::LocatableSeq object
243 Argument : positive integer for the sequence osition
244
245 =cut
246
247 sub get_seq_by_pos {
248 my ($self) = @_;
249 $self->throw_not_implemented();
250 }
251
252 =head1 Create new alignments
253
254 The result of these methods are horizontal or vertical subsets of the
255 current MSE.
256
257 =head2 select
258
259 Title : select
260 Usage : $aln2 = $aln->select(1, 3) # three first sequences
261 Function :
262
263 Creates a new alignment from a continuous subset of
264 sequences. Numbering starts from 1. Sequence positions
265 larger than no_sequences() will thow an error.
266
267 Returns : a Bio::SimpleAlign object
268 Argument : positive integer for the first sequence
269 positive integer for the last sequence to include (optional)
270
271 =cut
272
273 sub select {
274 my ($self) = @_;
275 $self->throw_not_implemented();
276 }
277
278
279 =head2 select_noncont
280
281 Title : select_noncont
282 Usage : $aln2 = $aln->select_noncont(1, 3) # first and 3rd sequences
283 Function :
284
285 Creates a new alignment from a subset of
286 sequences. Numbering starts from 1. Sequence positions
287 larger than no_sequences() will thow an error.
288
289 Returns : a Bio::SimpleAlign object
290 Args : array of integers for the sequences
291
292 =cut
293
294 sub select_noncont {
295 my ($self) = @_;
296 $self->throw_not_implemented();
297 }
298
299 =head2 slice
300
301 Title : slice
302 Usage : $aln2 = $aln->slice(20, 30)
303 Function :
304
305 Creates a slice from the alignment inclusive of start and
306 end columns. Sequences with no residues in the slice are
307 excluded from the new alignment and a warning is printed.
308 Slice beyond the length of the sequence does not do
309 padding.
310
311 Returns : a Bio::SimpleAlign object
312 Argument : positive integer for start column
313 positive integer for end column
314
315 =cut
316
317 sub slice {
318 my ($self) = @_;
319 $self->throw_not_implemented();
320 }
321
322 =head1 Change sequences within the MSE
323
324 These methods affect characters in all sequences without changeing the
325 alignment.
326
327
328 =head2 map_chars
329
330 Title : map_chars
331 Usage : $ali->map_chars('\.','-')
332 Function :
333
334 Does a s/$arg1/$arg2/ on the sequences. Useful for gap
335 characters
336
337 Notice that the from (arg1) is interpretted as a regex,
338 so be careful about quoting meta characters (eg
339 $ali->map_chars('.','-') wont do what you want)
340
341 Returns :
342 Argument : 'from' rexexp
343 'to' string
344
345 =cut
346
347 sub map_chars {
348 my ($self) = @_;
349 $self->throw_not_implemented();
350 }
351
352 =head2 uppercase
353
354 Title : uppercase()
355 Usage : $ali->uppercase()
356 Function : Sets all the sequences to uppercase
357 Returns :
358 Argument :
359
360 =cut
361
362 sub uppercase {
363 my ($self) = @_;
364 $self->throw_not_implemented();
365 }
366
367 =head2 match_line
368
369 Title : match_line()
370 Usage : $align->match_line()
371 Function : Generates a match line - much like consensus string
372 except that a line indicating the '*' for a match.
373 Argument : (optional) Match line characters ('*' by default)
374 (optional) Strong match char (':' by default)
375 (optional) Weak match char ('.' by default)
376
377 =cut
378
379 sub match_line {
380 my ($self) = @_;
381 $self->throw_not_implemented();
382 }
383
384 =head2 match
385
386 Title : match()
387 Usage : $ali->match()
388 Function :
389
390 Goes through all columns and changes residues that are
391 identical to residue in first sequence to match '.'
392 character. Sets match_char.
393
394 USE WITH CARE: Most MSE formats do not support match
395 characters in sequences, so this is mostly for output
396 only. NEXUS format (Bio::AlignIO::nexus) can handle
397 it.
398
399 Returns : 1
400 Argument : a match character, optional, defaults to '.'
401
402 =cut
403
404 sub match {
405 my ($self) = @_;
406 $self->throw_not_implemented();
407 }
408
409 =head2 unmatch
410
411 Title : unmatch()
412 Usage : $ali->unmatch()
413 Function :
414
415 Undoes the effect of method match. Unsets match_char.
416
417 Returns : 1
418 Argument : a match character, optional, defaults to '.'
419
420 =cut
421
422 sub unmatch {
423 my ($self) = @_;
424 $self->throw_not_implemented();
425 }
426
427
428 =head1 MSE attibutes
429
430 Methods for setting and reading the MSE attributes.
431
432 Note that the methods defining character semantics depend on the user
433 to set them sensibly. They are needed only by certain input/output
434 methods. Unset them by setting to an empty string ('').
435
436 =head2 id
437
438 Title : id
439 Usage : $myalign->id("Ig")
440 Function : Gets/sets the id field of the alignment
441 Returns : An id string
442 Argument : An id string (optional)
443
444 =cut
445
446 sub id {
447 my ($self) = @_;
448 $self->throw_not_implemented();
449 }
450
451 =head2 missing_char
452
453 Title : missing_char
454 Usage : $myalign->missing_char("?")
455 Function : Gets/sets the missing_char attribute of the alignment
456 It is generally recommended to set it to 'n' or 'N'
457 for nucleotides and to 'X' for protein.
458 Returns : An missing_char string,
459 Argument : An missing_char string (optional)
460
461 =cut
462
463 sub missing_char {
464 my ($self) = @_;
465 $self->throw_not_implemented();
466 }
467
468 =head2 match_char
469
470 Title : match_char
471 Usage : $myalign->match_char('.')
472 Function : Gets/sets the match_char attribute of the alignment
473 Returns : An match_char string,
474 Argument : An match_char string (optional)
475
476 =cut
477
478 sub match_char {
479 my ($self) = @_;
480 $self->throw_not_implemented();
481 }
482
483 =head2 gap_char
484
485 Title : gap_char
486 Usage : $myalign->gap_char('-')
487 Function : Gets/sets the gap_char attribute of the alignment
488 Returns : An gap_char string, defaults to '-'
489 Argument : An gap_char string (optional)
490
491 =cut
492
493 sub gap_char {
494 my ($self) = @_;
495 $self->throw_not_implemented();
496 }
497
498 =head2 symbol_chars
499
500 Title : symbol_chars
501 Usage : my @symbolchars = $aln->symbol_chars;
502 Function: Returns all the seen symbols (other than gaps)
503 Returns : array of characters that are the seen symbols
504 Argument: boolean to include the gap/missing/match characters
505
506 =cut
507
508 sub symbol_chars{
509 my ($self) = @_;
510 $self->throw_not_implemented();
511 }
512
513 =head1 Alignment descriptors
514
515 These read only methods describe the MSE in various ways.
516
517
518 =head2 consensus_string
519
520 Title : consensus_string
521 Usage : $str = $ali->consensus_string($threshold_percent)
522 Function : Makes a strict consensus
523 Returns :
524 Argument : Optional treshold ranging from 0 to 100.
525 The consensus residue has to appear at least threshold %
526 of the sequences at a given location, otherwise a '?'
527 character will be placed at that location.
528 (Default value = 0%)
529
530 =cut
531
532 sub consensus_string {
533 my ($self) = @_;
534 $self->throw_not_implemented();
535 }
536
537 =head2 consensus_iupac
538
539 Title : consensus_iupac
540 Usage : $str = $ali->consensus_iupac()
541 Function :
542
543 Makes a consensus using IUPAC ambiguity codes from DNA
544 and RNA. The output is in upper case except when gaps in
545 a column force output to be in lower case.
546
547 Note that if your alignment sequences contain a lot of
548 IUPAC ambiquity codes you often have to manually set
549 alphabet. Bio::PrimarySeq::_guess_type thinks they
550 indicate a protein sequence.
551
552 Returns : consensus string
553 Argument : none
554 Throws : on protein sequences
555
556
557 =cut
558
559 sub consensus_iupac {
560 my ($self) = @_;
561 $self->throw_not_implemented();
562 }
563
564 =head2 is_flush
565
566 Title : is_flush
567 Usage : if( $ali->is_flush() )
568 :
569 :
570 Function : Tells you whether the alignment
571 : is flush, ie all of the same length
572 :
573 :
574 Returns : 1 or 0
575 Argument :
576
577 =cut
578
579 sub is_flush {
580 my ($self) = @_;
581 $self->throw_not_implemented();
582 }
583
584 =head2 length
585
586 Title : length()
587 Usage : $len = $ali->length()
588 Function : Returns the maximum length of the alignment.
589 To be sure the alignment is a block, use is_flush
590 Returns :
591 Argument :
592
593 =cut
594
595 sub length {
596 my ($self) = @_;
597 $self->throw_not_implemented();
598 }
599
600 =head2 maxdisplayname_length
601
602 Title : maxdisplayname_length
603 Usage : $ali->maxdisplayname_length()
604 Function :
605
606 Gets the maximum length of the displayname in the
607 alignment. Used in writing out various MSE formats.
608
609 Returns : integer
610 Argument :
611
612 =cut
613
614 sub maxname_length {
615 my ($self) = @_;
616 $self->throw_not_implemented();
617 }
618
619 =head2 no_residues
620
621 Title : no_residues
622 Usage : $no = $ali->no_residues
623 Function : number of residues in total in the alignment
624 Returns : integer
625 Argument :
626
627 =cut
628
629 sub no_residues {
630 my ($self) = @_;
631 $self->throw_not_implemented();
632 }
633
634 =head2 no_sequences
635
636 Title : no_sequences
637 Usage : $depth = $ali->no_sequences
638 Function : number of sequence in the sequence alignment
639 Returns : integer
640 Argument : None
641
642 =cut
643
644 sub no_sequences {
645 my ($self) = @_;
646 $self->throw_not_implemented();
647 }
648
649 =head2 percentage_identity
650
651 Title : percentage_identity
652 Usage : $id = $align->percentage_identity
653 Function: The function calculates the percentage identity of the alignment
654 Returns : The percentage identity of the alignment (as defined by the
655 implementation)
656 Argument: None
657
658 =cut
659
660 sub percentage_identity{
661 my ($self) = @_;
662 $self->throw_not_implemeneted();
663 }
664
665 =head2 overall_percentage_identity
666
667 Title : percentage_identity
668 Usage : $id = $align->percentage_identity
669 Function: The function calculates the percentage identity of
670 the conserved columns
671 Returns : The percentage identity of the conserved columns
672 Args : None
673
674 =cut
675
676 sub overall_percentage_identity{
677 my ($self) = @_;
678 $self->throw_not_implemented();
679 }
680
681
682 =head2 average_percentage_identity
683
684 Title : average_percentage_identity
685 Usage : $id = $align->average_percentage_identity
686 Function: The function uses a fast method to calculate the average
687 percentage identity of the alignment
688 Returns : The average percentage identity of the alignment
689 Args : None
690
691 =cut
692
693 sub average_percentage_identity{
694 my ($self) = @_;
695 $self->throw_not_implemented();
696 }
697
698 =head1 Alignment positions
699
700 Methods to map a sequence position into an alignment column and back.
701 column_from_residue_number() does the former. The latter is really a
702 property of the sequence object and can done using
703 L<Bio::LocatableSeq::location_from_column>:
704
705 # select somehow a sequence from the alignment, e.g.
706 my $seq = $aln->get_seq_by_pos(1);
707 #$loc is undef or Bio::LocationI object
708 my $loc = $seq->location_from_column(5);
709
710
711 =head2 column_from_residue_number
712
713 Title : column_from_residue_number
714 Usage : $col = $ali->column_from_residue_number( $seqname, $resnumber)
715 Function:
716
717 This function gives the position in the alignment
718 (i.e. column number) of the given residue number in the
719 sequence with the given name. For example, for the
720 alignment
721
722 Seq1/91-97 AC..DEF.GH
723 Seq2/24-30 ACGG.RTY..
724 Seq3/43-51 AC.DDEFGHI
725
726 column_from_residue_number( "Seq1", 94 ) returns 5.
727 column_from_residue_number( "Seq2", 25 ) returns 2.
728 column_from_residue_number( "Seq3", 50 ) returns 9.
729
730 An exception is thrown if the residue number would lie
731 outside the length of the aligment
732 (e.g. column_from_residue_number( "Seq2", 22 )
733
734 Note: If the the parent sequence is represented by more than
735 one alignment sequence and the residue number is present in
736 them, this method finds only the first one.
737
738 Returns : A column number for the position in the alignment of the
739 given residue in the given sequence (1 = first column)
740 Args : A sequence id/name (not a name/start-end)
741 A residue number in the whole sequence (not just that
742 segment of it in the alignment)
743
744 =cut
745
746 sub column_from_residue_number {
747 my ($self) = @_;
748 $self->throw_not_implemented();
749 }
750
751 =head1 Sequence names
752
753 Methods to manipulate the display name. The default name based on the
754 sequence id and subsequence positions can be overridden in various
755 ways.
756
757 =head2 displayname
758
759 Title : displayname
760 Usage : $myalign->displayname("Ig", "IgA")
761 Function : Gets/sets the display name of a sequence in the alignment
762 :
763 Returns : A display name string
764 Argument : name of the sequence
765 displayname of the sequence (optional)
766
767 =cut
768
769 sub displayname {
770 my ($self) = @_;
771 $self->throw_not_implemented();
772 }
773
774 =head2 set_displayname_count
775
776 Title : set_displayname_count
777 Usage : $ali->set_displayname_count
778 Function :
779
780 Sets the names to be name_# where # is the number of
781 times this name has been used.
782
783 Returns : None
784 Argument : None
785
786 =cut
787
788 sub set_displayname_count {
789 my ($self) = @_;
790 $self->throw_not_implemented();
791 }
792
793 =head2 set_displayname_flat
794
795 Title : set_displayname_flat
796 Usage : $ali->set_displayname_flat()
797 Function : Makes all the sequences be displayed as just their name,
798 not name/start-end
799 Returns : 1
800 Argument : None
801
802 =cut
803
804 sub set_displayname_flat {
805 my ($self) = @_;
806 $self->throw_not_implemented();
807 }
808
809 =head2 set_displayname_normal
810
811 Title : set_displayname_normal
812 Usage : $ali->set_displayname_normal()
813 Function : Makes all the sequences be displayed as name/start-end
814 Returns : None
815 Argument : None
816
817 =cut
818
819 sub set_displayname_normal {
820 my ($self) = @_;
821 $self->throw_not_implemented();
822 }
823
824 1;