Mercurial > repos > mahtabm > ensemb_rep_gvl
diff variant_effect_predictor/Bio/Align/AlignI.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Align/AlignI.pm Thu Apr 11 06:29:17 2013 -0400 @@ -0,0 +1,824 @@ +# $Id: AlignI.pm,v 1.7 2002/10/22 07:45:10 lapp Exp $ +# +# BioPerl module for Bio::Align::AlignI +# +# Cared for by Jason Stajich <jason@bioperl.org> +# +# Copyright Jason Stajich +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Align::AlignI - An interface for describing sequence alignments. + +=head1 SYNOPSIS + + # get a Bio::Align::AlignI somehow - typically using Bio::AlignIO system + # some descriptors + print $aln->length, "\n"; + print $aln->no_residues, "\n"; + print $aln->is_flush, "\n"; + print $aln->no_sequences, "\n"; + print $aln->percentage_identity, "\n"; + print $aln->consensus_string(50), "\n"; + + # find the position in the alignment for a sequence location + $pos = $aln->column_from_residue_number('1433_LYCES', 14); # = 6; + + # extract sequences and check values for the alignment column $pos + foreach $seq ($aln->each_seq) { + $res = $seq->subseq($pos, $pos); + $count{$res}++; + } + foreach $res (keys %count) { + printf "Res: %s Count: %2d\n", $res, $count{$res}; + } + +=head1 DESCRIPTION + +This interface describes the basis for alignment objects. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bioperl.org/MailList.shtml - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via +email or the web: + + bioperl-bugs@bioperl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Jason Stajich + +Email jason@bioperl.org + +=head1 CONTRIBUTORS + +Ewan Birney, birney@ebi.ac.uk +Heikki Lehvaslaiho, heikki@ebi.ac.uk + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + +package Bio::Align::AlignI; +use vars qw(@ISA); +use strict; + +use Bio::Root::RootI; + +@ISA = qw(Bio::Root::RootI); + +=head1 Modifier methods + +These methods modify the MSE by adding, removing or shuffling complete +sequences. + +=head2 add_seq + + Title : add_seq + Usage : $myalign->add_seq($newseq); + Function : Adds another sequence to the alignment. *Does not* align + it - just adds it to the hashes. + Returns : nothing + Argument : a Bio::LocatableSeq object + order (optional) + +See L<Bio::LocatableSeq> for more information. + +=cut + +sub add_seq { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 remove_seq + + Title : remove_seq + Usage : $aln->remove_seq($seq); + Function : Removes a single sequence from an alignment + Returns : + Argument : a Bio::LocatableSeq object + +=cut + +sub remove_seq { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 purge + + Title : purge + Usage : $aln->purge(0.7); + Function: + + Removes sequences above whatever %id. + + This function will grind on large alignments. Beware! + (perhaps not ideally implemented) + + Example : + Returns : An array of the removed sequences + Argument: + + +=cut + +sub purge { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 sort_alphabetically + + Title : sort_alphabetically + Usage : $ali->sort_alphabetically + Function : + + Changes the order of the alignemnt to alphabetical on name + followed by numerical by number. + + Returns : + Argument : + +=cut + +sub sort_alphabetically { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head1 Sequence selection methods + +Methods returning one or more sequences objects. + +=head2 each_seq + + Title : each_seq + Usage : foreach $seq ( $align->each_seq() ) + Function : Gets an array of Seq objects from the alignment + Returns : an array + Argument : + +=cut + +sub each_seq { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 each_alphabetically + + Title : each_alphabetically + Usage : foreach $seq ( $ali->each_alphabetically() ) + Function : + + Returns an array of sequence object sorted alphabetically + by name and then by start point. + Does not change the order of the alignment + + Returns : + Argument : + +=cut + +sub each_alphabetically { + my($self) = @_; + $self->throw_not_implemented(); +} + +=head2 each_seq_with_id + + Title : each_seq_with_id + Usage : foreach $seq ( $align->each_seq_with_id() ) + Function : + + Gets an array of Seq objects from the + alignment, the contents being those sequences + with the given name (there may be more than one) + + Returns : an array + Argument : a seq name + +=cut + +sub each_seq_with_id { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 get_seq_by_pos + + Title : get_seq_by_pos + Usage : $seq = $aln->get_seq_by_pos(3) # third sequence from the alignment + Function : + + Gets a sequence based on its position in the alignment. + Numbering starts from 1. Sequence positions larger than + no_sequences() will thow an error. + + Returns : a Bio::LocatableSeq object + Argument : positive integer for the sequence osition + +=cut + +sub get_seq_by_pos { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head1 Create new alignments + +The result of these methods are horizontal or vertical subsets of the +current MSE. + +=head2 select + + Title : select + Usage : $aln2 = $aln->select(1, 3) # three first sequences + Function : + + Creates a new alignment from a continuous subset of + sequences. Numbering starts from 1. Sequence positions + larger than no_sequences() will thow an error. + + Returns : a Bio::SimpleAlign object + Argument : positive integer for the first sequence + positive integer for the last sequence to include (optional) + +=cut + +sub select { + my ($self) = @_; + $self->throw_not_implemented(); +} + + +=head2 select_noncont + + Title : select_noncont + Usage : $aln2 = $aln->select_noncont(1, 3) # first and 3rd sequences + Function : + + Creates a new alignment from a subset of + sequences. Numbering starts from 1. Sequence positions + larger than no_sequences() will thow an error. + + Returns : a Bio::SimpleAlign object + Args : array of integers for the sequences + +=cut + +sub select_noncont { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 slice + + Title : slice + Usage : $aln2 = $aln->slice(20, 30) + Function : + + Creates a slice from the alignment inclusive of start and + end columns. Sequences with no residues in the slice are + excluded from the new alignment and a warning is printed. + Slice beyond the length of the sequence does not do + padding. + + Returns : a Bio::SimpleAlign object + Argument : positive integer for start column + positive integer for end column + +=cut + +sub slice { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head1 Change sequences within the MSE + +These methods affect characters in all sequences without changeing the +alignment. + + +=head2 map_chars + + Title : map_chars + Usage : $ali->map_chars('\.','-') + Function : + + Does a s/$arg1/$arg2/ on the sequences. Useful for gap + characters + + Notice that the from (arg1) is interpretted as a regex, + so be careful about quoting meta characters (eg + $ali->map_chars('.','-') wont do what you want) + + Returns : + Argument : 'from' rexexp + 'to' string + +=cut + +sub map_chars { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 uppercase + + Title : uppercase() + Usage : $ali->uppercase() + Function : Sets all the sequences to uppercase + Returns : + Argument : + +=cut + +sub uppercase { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 match_line + + Title : match_line() + Usage : $align->match_line() + Function : Generates a match line - much like consensus string + except that a line indicating the '*' for a match. + Argument : (optional) Match line characters ('*' by default) + (optional) Strong match char (':' by default) + (optional) Weak match char ('.' by default) + +=cut + +sub match_line { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 match + + Title : match() + Usage : $ali->match() + Function : + + Goes through all columns and changes residues that are + identical to residue in first sequence to match '.' + character. Sets match_char. + + USE WITH CARE: Most MSE formats do not support match + characters in sequences, so this is mostly for output + only. NEXUS format (Bio::AlignIO::nexus) can handle + it. + + Returns : 1 + Argument : a match character, optional, defaults to '.' + +=cut + +sub match { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 unmatch + + Title : unmatch() + Usage : $ali->unmatch() + Function : + + Undoes the effect of method match. Unsets match_char. + + Returns : 1 + Argument : a match character, optional, defaults to '.' + +=cut + +sub unmatch { + my ($self) = @_; + $self->throw_not_implemented(); +} + + +=head1 MSE attibutes + +Methods for setting and reading the MSE attributes. + +Note that the methods defining character semantics depend on the user +to set them sensibly. They are needed only by certain input/output +methods. Unset them by setting to an empty string (''). + +=head2 id + + Title : id + Usage : $myalign->id("Ig") + Function : Gets/sets the id field of the alignment + Returns : An id string + Argument : An id string (optional) + +=cut + +sub id { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 missing_char + + Title : missing_char + Usage : $myalign->missing_char("?") + Function : Gets/sets the missing_char attribute of the alignment + It is generally recommended to set it to 'n' or 'N' + for nucleotides and to 'X' for protein. + Returns : An missing_char string, + Argument : An missing_char string (optional) + +=cut + +sub missing_char { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 match_char + + Title : match_char + Usage : $myalign->match_char('.') + Function : Gets/sets the match_char attribute of the alignment + Returns : An match_char string, + Argument : An match_char string (optional) + +=cut + +sub match_char { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 gap_char + + Title : gap_char + Usage : $myalign->gap_char('-') + Function : Gets/sets the gap_char attribute of the alignment + Returns : An gap_char string, defaults to '-' + Argument : An gap_char string (optional) + +=cut + +sub gap_char { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 symbol_chars + + Title : symbol_chars + Usage : my @symbolchars = $aln->symbol_chars; + Function: Returns all the seen symbols (other than gaps) + Returns : array of characters that are the seen symbols + Argument: boolean to include the gap/missing/match characters + +=cut + +sub symbol_chars{ + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head1 Alignment descriptors + +These read only methods describe the MSE in various ways. + + +=head2 consensus_string + + Title : consensus_string + Usage : $str = $ali->consensus_string($threshold_percent) + Function : Makes a strict consensus + Returns : + Argument : Optional treshold ranging from 0 to 100. + The consensus residue has to appear at least threshold % + of the sequences at a given location, otherwise a '?' + character will be placed at that location. + (Default value = 0%) + +=cut + +sub consensus_string { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 consensus_iupac + + Title : consensus_iupac + Usage : $str = $ali->consensus_iupac() + Function : + + Makes a consensus using IUPAC ambiguity codes from DNA + and RNA. The output is in upper case except when gaps in + a column force output to be in lower case. + + Note that if your alignment sequences contain a lot of + IUPAC ambiquity codes you often have to manually set + alphabet. Bio::PrimarySeq::_guess_type thinks they + indicate a protein sequence. + + Returns : consensus string + Argument : none + Throws : on protein sequences + + +=cut + +sub consensus_iupac { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 is_flush + + Title : is_flush + Usage : if( $ali->is_flush() ) + : + : + Function : Tells you whether the alignment + : is flush, ie all of the same length + : + : + Returns : 1 or 0 + Argument : + +=cut + +sub is_flush { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 length + + Title : length() + Usage : $len = $ali->length() + Function : Returns the maximum length of the alignment. + To be sure the alignment is a block, use is_flush + Returns : + Argument : + +=cut + +sub length { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 maxdisplayname_length + + Title : maxdisplayname_length + Usage : $ali->maxdisplayname_length() + Function : + + Gets the maximum length of the displayname in the + alignment. Used in writing out various MSE formats. + + Returns : integer + Argument : + +=cut + +sub maxname_length { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 no_residues + + Title : no_residues + Usage : $no = $ali->no_residues + Function : number of residues in total in the alignment + Returns : integer + Argument : + +=cut + +sub no_residues { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 no_sequences + + Title : no_sequences + Usage : $depth = $ali->no_sequences + Function : number of sequence in the sequence alignment + Returns : integer + Argument : None + +=cut + +sub no_sequences { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 percentage_identity + + Title : percentage_identity + Usage : $id = $align->percentage_identity + Function: The function calculates the percentage identity of the alignment + Returns : The percentage identity of the alignment (as defined by the + implementation) + Argument: None + +=cut + +sub percentage_identity{ + my ($self) = @_; + $self->throw_not_implemeneted(); +} + +=head2 overall_percentage_identity + + Title : percentage_identity + Usage : $id = $align->percentage_identity + Function: The function calculates the percentage identity of + the conserved columns + Returns : The percentage identity of the conserved columns + Args : None + +=cut + +sub overall_percentage_identity{ + my ($self) = @_; + $self->throw_not_implemented(); +} + + +=head2 average_percentage_identity + + Title : average_percentage_identity + Usage : $id = $align->average_percentage_identity + Function: The function uses a fast method to calculate the average + percentage identity of the alignment + Returns : The average percentage identity of the alignment + Args : None + +=cut + +sub average_percentage_identity{ + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head1 Alignment positions + +Methods to map a sequence position into an alignment column and back. +column_from_residue_number() does the former. The latter is really a +property of the sequence object and can done using +L<Bio::LocatableSeq::location_from_column>: + + # select somehow a sequence from the alignment, e.g. + my $seq = $aln->get_seq_by_pos(1); + #$loc is undef or Bio::LocationI object + my $loc = $seq->location_from_column(5); + + +=head2 column_from_residue_number + + Title : column_from_residue_number + Usage : $col = $ali->column_from_residue_number( $seqname, $resnumber) + Function: + + This function gives the position in the alignment + (i.e. column number) of the given residue number in the + sequence with the given name. For example, for the + alignment + + Seq1/91-97 AC..DEF.GH + Seq2/24-30 ACGG.RTY.. + Seq3/43-51 AC.DDEFGHI + + column_from_residue_number( "Seq1", 94 ) returns 5. + column_from_residue_number( "Seq2", 25 ) returns 2. + column_from_residue_number( "Seq3", 50 ) returns 9. + + An exception is thrown if the residue number would lie + outside the length of the aligment + (e.g. column_from_residue_number( "Seq2", 22 ) + + Note: If the the parent sequence is represented by more than + one alignment sequence and the residue number is present in + them, this method finds only the first one. + + Returns : A column number for the position in the alignment of the + given residue in the given sequence (1 = first column) + Args : A sequence id/name (not a name/start-end) + A residue number in the whole sequence (not just that + segment of it in the alignment) + +=cut + +sub column_from_residue_number { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head1 Sequence names + +Methods to manipulate the display name. The default name based on the +sequence id and subsequence positions can be overridden in various +ways. + +=head2 displayname + + Title : displayname + Usage : $myalign->displayname("Ig", "IgA") + Function : Gets/sets the display name of a sequence in the alignment + : + Returns : A display name string + Argument : name of the sequence + displayname of the sequence (optional) + +=cut + +sub displayname { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 set_displayname_count + + Title : set_displayname_count + Usage : $ali->set_displayname_count + Function : + + Sets the names to be name_# where # is the number of + times this name has been used. + + Returns : None + Argument : None + +=cut + +sub set_displayname_count { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 set_displayname_flat + + Title : set_displayname_flat + Usage : $ali->set_displayname_flat() + Function : Makes all the sequences be displayed as just their name, + not name/start-end + Returns : 1 + Argument : None + +=cut + +sub set_displayname_flat { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 set_displayname_normal + + Title : set_displayname_normal + Usage : $ali->set_displayname_normal() + Function : Makes all the sequences be displayed as name/start-end + Returns : None + Argument : None + +=cut + +sub set_displayname_normal { + my ($self) = @_; + $self->throw_not_implemented(); +} + +1;