Mercurial > repos > mahtabm > ensemb_rep_gvl
diff variant_effect_predictor/Bio/Align/Utilities.pm @ 0:2bc9b66ada89 draft default tip
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 06:29:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/Align/Utilities.pm Thu Apr 11 06:29:17 2013 -0400 @@ -0,0 +1,161 @@ +# $Id: Utilities.pm,v 1.8 2002/11/11 18:39:19 jason Exp $ +# +# BioPerl module for Bio::Align::Utilities +# +# Cared for by Jason Stajich <jason@bioperl.org> +# +# Copyright Jason Stajich +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Align::Utilities - A collection of utilities regarding converting and manipulating alignment objects + +=head1 SYNOPSIS + +use Bio::Align::Utilities qw(aa_to_dna_aln); + +my $dna_aln = aa_to_dna_aln($aaaln,\%dnaseqs); + + +=head1 DESCRIPTION + +This module contains utility methods for manipulating sequence +alignments ( L<Bio::Align::AlignI>) objects. + +The B<aa_to_dna_aln> utility is essentially the same as the B<mrtrans> +program by Bill Pearson available at +ftp://ftp.virginia.edu/pub/fasta/other/mrtrans.shar. Of course this +is a pure-perl implementation, but just to mention that if anything +seems odd you can check the alignments generated against Bill's +program. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bioperl.org/MailList.shtml - About the mailing lists + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via +email or the web: + + bioperl-bugs@bioperl.org + http://bugzilla.bioperl.org/ + +=head1 AUTHOR - Jason Stajich + +Email jason@bioperl.org + +=head1 CONTRIBUTORS + +Additional contributors names and emails here + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + +#' keep my emacs happy +# Let the code begin... + + +package Bio::Align::Utilities; +use vars qw(@ISA @EXPORT @EXPORT_OK); +use strict; +use Carp; +require Exporter; + +@ISA = qw(Exporter); + +@EXPORT = qw(); +@EXPORT_OK = qw(aa_to_dna_aln); + +use constant CODONSIZE => 3; + +=head2 aa_to_dna_aln + + Title : aa_to_dna_aln + Usage : my $dnaaln = aa_to_dna_aln($aa_aln, \%seqs); + Function: Will convert an AA alignment to DNA space given the + corresponding DNA sequences. Note that this method expects + the DNA sequences to be in frame +1 (GFF frame 0) as it will + start to project into coordinates starting at the first base of + the DNA sequence, if this alignment represents a different + frame for the cDNA you will need to edit the DNA sequences + to remove the 1st or 2nd bases (and revcom if things should be). + Returns : Bio::Align::AlignI object + Args : 2 arguments, the alignment and a hashref. + Alignment is a Bio::Align::AlignI of amino acid sequences. + The hash reference should have keys which are + the display_ids for the aa + sequences in the alignment and the values are a + Bio::PrimarySeqI object for the corresponding + spliced cDNA sequence. + +See also: L<Bio::Align::AlignI>, L<Bio::SimpleAlign>, L<Bio::PrimarySeq> + +=cut + +sub aa_to_dna_aln { + my ($aln,$dnaseqs) = @_; + unless( defined $aln && + ref($aln) && + $aln->isa('Bio::Align::AlignI') ) { + croak('Must provide a valid Bio::Align::AlignI object as the first argument to aa_to_dna_aln, see the documentation for proper usage and the method signature'); + } + my $alnlen = $aln->length; + #print "HSP length is $alnlen\n"; + my $dnaalign = new Bio::SimpleAlign; + foreach my $seq ( $aln->each_seq ) { + my $newseq; + my $dnaseq = $dnaseqs->{$seq->display_id} || croak("cannot find ". + $seq->display_id); + foreach my $pos ( 1..$alnlen ) { + my $loc = $seq->location_from_column($pos); + my $dna = ''; + if( !defined $loc || $loc->location_type ne 'EXACT' ) { + $dna = '---'; + } else { + # To readjust to codon boundaries + # end needs to be +1 so we can just multiply by CODONSIZE + # to get this + + my ($start,$end) = ((($loc->start - 1)* CODONSIZE) +1, + ($loc->end)* CODONSIZE); + + if( $start <=0 || $end > $dnaseq->length() ) { + print STDERR "start is ", $loc->start, " end is ", $loc->end, " while dnaseq length is ", $dnaseq->length(), " and start/end projected are $start,$end \n"; + warn("codons don't seem to be matching up for $start,$end"); + $dna = '---'; + } else { + $dna = $dnaseq->subseq($start,$end); + } + } + $newseq .= $dna; + } + # funky looking math is to readjust to codon boundaries and deal + # with fact that sequence start with 1 + my $newdna = new Bio::LocatableSeq(-display_id => $seq->id(), + -start => (($seq->start - 1) * + CODONSIZE) + 1, + -end => ($seq->end * CODONSIZE), + -strand => $seq->strand, + -seq => $newseq); + $dnaalign->add_seq($newdna); + } + return $dnaalign; +} +1;