comparison variant_effect_predictor/Bio/Align/Utilities.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: Utilities.pm,v 1.8 2002/11/11 18:39:19 jason Exp $
2 #
3 # BioPerl module for Bio::Align::Utilities
4 #
5 # Cared for by Jason Stajich <jason@bioperl.org>
6 #
7 # Copyright Jason Stajich
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::Align::Utilities - A collection of utilities regarding converting and manipulating alignment objects
16
17 =head1 SYNOPSIS
18
19 use Bio::Align::Utilities qw(aa_to_dna_aln);
20
21 my $dna_aln = aa_to_dna_aln($aaaln,\%dnaseqs);
22
23
24 =head1 DESCRIPTION
25
26 This module contains utility methods for manipulating sequence
27 alignments ( L<Bio::Align::AlignI>) objects.
28
29 The B<aa_to_dna_aln> utility is essentially the same as the B<mrtrans>
30 program by Bill Pearson available at
31 ftp://ftp.virginia.edu/pub/fasta/other/mrtrans.shar. Of course this
32 is a pure-perl implementation, but just to mention that if anything
33 seems odd you can check the alignments generated against Bill's
34 program.
35
36 =head1 FEEDBACK
37
38 =head2 Mailing Lists
39
40 User feedback is an integral part of the evolution of this and other
41 Bioperl modules. Send your comments and suggestions preferably to
42 the Bioperl mailing list. Your participation is much appreciated.
43
44 bioperl-l@bioperl.org - General discussion
45 http://bioperl.org/MailList.shtml - About the mailing lists
46
47 =head2 Reporting Bugs
48
49 Report bugs to the Bioperl bug tracking system to help us keep track
50 of the bugs and their resolution. Bug reports can be submitted via
51 email or the web:
52
53 bioperl-bugs@bioperl.org
54 http://bugzilla.bioperl.org/
55
56 =head1 AUTHOR - Jason Stajich
57
58 Email jason@bioperl.org
59
60 =head1 CONTRIBUTORS
61
62 Additional contributors names and emails here
63
64 =head1 APPENDIX
65
66 The rest of the documentation details each of the object methods.
67 Internal methods are usually preceded with a _
68
69 =cut
70
71 #' keep my emacs happy
72 # Let the code begin...
73
74
75 package Bio::Align::Utilities;
76 use vars qw(@ISA @EXPORT @EXPORT_OK);
77 use strict;
78 use Carp;
79 require Exporter;
80
81 @ISA = qw(Exporter);
82
83 @EXPORT = qw();
84 @EXPORT_OK = qw(aa_to_dna_aln);
85
86 use constant CODONSIZE => 3;
87
88 =head2 aa_to_dna_aln
89
90 Title : aa_to_dna_aln
91 Usage : my $dnaaln = aa_to_dna_aln($aa_aln, \%seqs);
92 Function: Will convert an AA alignment to DNA space given the
93 corresponding DNA sequences. Note that this method expects
94 the DNA sequences to be in frame +1 (GFF frame 0) as it will
95 start to project into coordinates starting at the first base of
96 the DNA sequence, if this alignment represents a different
97 frame for the cDNA you will need to edit the DNA sequences
98 to remove the 1st or 2nd bases (and revcom if things should be).
99 Returns : Bio::Align::AlignI object
100 Args : 2 arguments, the alignment and a hashref.
101 Alignment is a Bio::Align::AlignI of amino acid sequences.
102 The hash reference should have keys which are
103 the display_ids for the aa
104 sequences in the alignment and the values are a
105 Bio::PrimarySeqI object for the corresponding
106 spliced cDNA sequence.
107
108 See also: L<Bio::Align::AlignI>, L<Bio::SimpleAlign>, L<Bio::PrimarySeq>
109
110 =cut
111
112 sub aa_to_dna_aln {
113 my ($aln,$dnaseqs) = @_;
114 unless( defined $aln &&
115 ref($aln) &&
116 $aln->isa('Bio::Align::AlignI') ) {
117 croak('Must provide a valid Bio::Align::AlignI object as the first argument to aa_to_dna_aln, see the documentation for proper usage and the method signature');
118 }
119 my $alnlen = $aln->length;
120 #print "HSP length is $alnlen\n";
121 my $dnaalign = new Bio::SimpleAlign;
122 foreach my $seq ( $aln->each_seq ) {
123 my $newseq;
124 my $dnaseq = $dnaseqs->{$seq->display_id} || croak("cannot find ".
125 $seq->display_id);
126 foreach my $pos ( 1..$alnlen ) {
127 my $loc = $seq->location_from_column($pos);
128 my $dna = '';
129 if( !defined $loc || $loc->location_type ne 'EXACT' ) {
130 $dna = '---';
131 } else {
132 # To readjust to codon boundaries
133 # end needs to be +1 so we can just multiply by CODONSIZE
134 # to get this
135
136 my ($start,$end) = ((($loc->start - 1)* CODONSIZE) +1,
137 ($loc->end)* CODONSIZE);
138
139 if( $start <=0 || $end > $dnaseq->length() ) {
140 print STDERR "start is ", $loc->start, " end is ", $loc->end, " while dnaseq length is ", $dnaseq->length(), " and start/end projected are $start,$end \n";
141 warn("codons don't seem to be matching up for $start,$end");
142 $dna = '---';
143 } else {
144 $dna = $dnaseq->subseq($start,$end);
145 }
146 }
147 $newseq .= $dna;
148 }
149 # funky looking math is to readjust to codon boundaries and deal
150 # with fact that sequence start with 1
151 my $newdna = new Bio::LocatableSeq(-display_id => $seq->id(),
152 -start => (($seq->start - 1) *
153 CODONSIZE) + 1,
154 -end => ($seq->end * CODONSIZE),
155 -strand => $seq->strand,
156 -seq => $newseq);
157 $dnaalign->add_seq($newdna);
158 }
159 return $dnaalign;
160 }
161 1;