annotate variant_effect_predictor/Bio/EnsEMBL/TranscriptMapper.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
5
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
8
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
10
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
12
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
15
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
18
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
19 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
20
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
22
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
23 TranscriptMapper - A utility class used to perform coordinate conversions
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
24 between a number of coordinate systems relating to transcripts
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
25
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
26 =head1 SYNOPSIS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
27
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
28 my $trmapper = Bio::EnsEMBL::TranscriptMapper->new($transcript);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
29
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
30 @coords = $trmapper->cdna2genomic( 123, 554 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
31
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
32 @coords = $trmapper->genomic2cdna( 141, 500, -1 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
33
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
34 @coords = $trmapper->genomic2cds( 141, 500, -1 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
35
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
36 @coords = $trmapper->pep2genomic( 10, 60 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
37
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
38 @coords = $trmapper->genomic2pep( 123, 400, 1 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
39
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
40 =head1 DESCRIPTION
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
41
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
42 This is a utility class which can be used to perform coordinate conversions
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
43 between a number of coordinate systems relating to transcripts.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
44
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
45 =head1 METHODS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
46
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
47 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
48
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
49 package Bio::EnsEMBL::TranscriptMapper;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
50
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
51 use strict;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
52 use warnings;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
53
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
54 use Bio::EnsEMBL::Utils::Exception qw(throw);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
55
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
56 use Bio::EnsEMBL::Mapper;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
57 use Bio::EnsEMBL::Mapper::Gap;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
58 use Bio::EnsEMBL::Mapper::Coordinate;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
59
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
60
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
61
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
62 =head2 new
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
63
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
64 Arg [1] : Bio::EnsEMBL::Transcript $transcript
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
65 The transcript for which a TranscriptMapper should be created.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
66 Example : $trans_mapper = Bio::EnsEMBL::TranscriptMapper->new($transcript)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
67 Description: Creates a TranscriptMapper object which can be used to perform
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
68 various coordinate transformations relating to transcripts.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
69 Note that the TranscriptMapper uses the transcript state at the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
70 time of creation to perform the conversions, and that a new
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
71 TranscriptMapper must be created if the Transcript is altered.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
72 'Genomic' coordinates are coordinates which are relative to the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
73 slice that the Transcript is on.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
74 Returntype : Bio::EnsEMBL::TranscriptMapper
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
75 Exceptions : throws if a transcript is not an argument
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
76 Caller : Transcript::get_TranscriptMapper
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
77 Status : Stable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
78
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
79 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
80
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
81 sub new {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
82 my $caller = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
83 my $transcript = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
84
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
85 my $class = ref($caller) || $caller;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
86
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
87 if(!ref($transcript) || !$transcript->isa('Bio::EnsEMBL::Transcript')) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
88 throw("Transcript argument is required.");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
89 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
90
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
91
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
92 my $exons = $transcript->get_all_Exons();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
93 my $start_phase;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
94 if(@$exons) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
95 $start_phase = $exons->[0]->phase;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
96 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
97 $start_phase = -1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
98 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
99
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
100 # Create a cdna <-> genomic mapper and load it with exon coords
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
101 my $mapper = _load_mapper($transcript,$start_phase);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
102
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
103 my $self = bless({'exon_coord_mapper' => $mapper,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
104 'start_phase' => $start_phase,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
105 'cdna_coding_start' => $transcript->cdna_coding_start(),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
106 'cdna_coding_end' => $transcript->cdna_coding_end()},
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
107 $class);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
108 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
109
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
110
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
111 =head2 _load_mapper
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
112
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
113 Arg [1] : Bio::EnsEMBL::Transcript $transcript
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
114 The transcript for which a mapper should be created.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
115 Example : my $mapper = _load_mapper($transcript);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
116 Description: loads the mapper
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
117 Returntype : Bio::EnsEMBL::Mapper
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
118 Exceptions : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
119 Caller : Internal
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
120 Status : Stable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
121
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
122 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
123
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
124 sub _load_mapper {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
125 my $transcript = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
126 my $start_phase = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
127
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
128 my $mapper = Bio::EnsEMBL::Mapper->new( 'cdna', 'genomic');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
129
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
130 my $edits_on = $transcript->edits_enabled();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
131 my @edits;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
132
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
133 if($edits_on) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
134 @edits = @{$transcript->get_all_SeqEdits()};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
135 @edits = sort {$a->start() <=> $b->start()} @edits;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
136 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
137
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
138 my $edit_shift = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
139
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
140 my $cdna_start = undef;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
141
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
142 my $cdna_end = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
143
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
144
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
145 foreach my $ex (@{$transcript->get_all_Exons}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
146 my $gen_start = $ex->start();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
147 my $gen_end = $ex->end();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
148
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
149 $cdna_start = $cdna_end + 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
150 $cdna_end = $cdna_start + $ex->length() - 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
151
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
152 my $strand = $ex->strand();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
153
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
154 # add deletions and insertions into pairs when SeqEdits turned on
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
155 # ignore mismatches (i.e. treat as matches)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
156 if($edits_on) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
157 while(@edits && $edits[0]->start() + $edit_shift <= $cdna_end) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
158
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
159 my $edit = shift(@edits);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
160 my $len_diff = $edit->length_diff();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
161
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
162 if($len_diff) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
163 # break pair into two parts, finish first pair just before edit
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
164
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
165 my $prev_cdna_end = $edit->start() + $edit_shift - 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
166 my $prev_cdna_start = $cdna_start;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
167 my $prev_len = $prev_cdna_end - $prev_cdna_start + 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
168
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
169 my $prev_gen_end;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
170 my $prev_gen_start;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
171 if($strand == 1) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
172 $prev_gen_start = $gen_start;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
173 $prev_gen_end = $gen_start + $prev_len - 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
174 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
175 $prev_gen_start = $gen_end - $prev_len + 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
176 $prev_gen_end = $gen_end;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
177 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
178
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
179 if($prev_len > 0) { # only create map pair if not boundary case
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
180 $mapper->add_map_coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
181 ('cdna', $prev_cdna_start, $prev_cdna_end, $strand,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
182 'genome', $prev_gen_start,$prev_gen_end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
183 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
184
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
185 $cdna_start = $prev_cdna_end + 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
186
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
187 if($strand == 1) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
188 $gen_start = $prev_gen_end + 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
189 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
190 $gen_end = $prev_gen_start - 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
191 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
192
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
193 $cdna_end += $len_diff;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
194
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
195 if($len_diff > 0) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
196 # insert in cdna, shift cdna coords along
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
197 $cdna_start += $len_diff;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
198 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
199 # delete in cdna (insert in genomic), shift genomic coords along
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
200
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
201 if($strand == 1) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
202 $gen_start -= $len_diff;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
203 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
204 $gen_end += $len_diff;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
205 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
206 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
207
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
208 $edit_shift += $len_diff;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
209 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
210 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
211 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
212
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
213 my $pair_len = $cdna_end - $cdna_start + 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
214
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
215 if($pair_len > 0) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
216 $mapper->add_map_coordinates('cdna', $cdna_start, $cdna_end, $strand,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
217 'genome', $gen_start, $gen_end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
218 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
219 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
220
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
221 return $mapper;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
222 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
223
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
224
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
225 =head2 cdna2genomic
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
226
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
227 Arg [1] : $start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
228 The start position in cdna coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
229 Arg [2] : $end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
230 The end position in cdna coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
231 Example : @cdna_coords = $transcript_mapper->cdna2genomic($start, $end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
232 Description: Converts cdna coordinates to genomic coordinates. The
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
233 return value is a list of coordinates and gaps.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
234 Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
235 Bio::EnsEMBL::Mapper::Gap objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
236 Exceptions : throws if no start or end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
237 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
238 Status : Stable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
239
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
240 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
241
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
242
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
243 sub cdna2genomic {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
244 my ($self,$start,$end) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
245
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
246 if( !defined $end ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
247 throw("Must call with start/end");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
248 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
249
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
250 my $mapper = $self->{'exon_coord_mapper'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
251
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
252 return $mapper->map_coordinates( 'cdna', $start, $end, 1, "cdna" );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
253
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
254 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
255
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
256
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
257 =head2 genomic2cdna
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
258
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
259 Arg [1] : $start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
260 The start position in genomic coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
261 Arg [2] : $end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
262 The end position in genomic coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
263 Arg [3] : $strand
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
264 The strand of the genomic coordinates (default value 1)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
265 Example : @coords = $trans_mapper->genomic2cdna($start, $end, $strnd);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
266 Description: Converts genomic coordinates to cdna coordinates. The
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
267 return value is a list of coordinates and gaps. Gaps
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
268 represent intronic or upstream/downstream regions which do
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
269 not comprise this transcripts cdna. Coordinate objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
270 represent genomic regions which map to exons (utrs included).
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
271 Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
272 Bio::EnsEMBL::Mapper::Gap objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
273 Exceptions : throws if start, end or strand not defined
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
274 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
275 Status : Stable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
276
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
277 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
278
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
279 sub genomic2cdna {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
280 my ($self, $start, $end, $strand) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
281
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
282 unless(defined $start && defined $end && defined $strand) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
283 throw("start, end and strand arguments are required\n");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
284 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
285
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
286 my $mapper = $self->{'exon_coord_mapper'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
287
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
288 return $mapper->map_coordinates("genome", $start, $end, $strand,"genomic");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
289
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
290 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
291
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
292
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
293 =head2 cds2genomic
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
294
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
295 Arg [1] : int $start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
296 start position in cds coords
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
297 Arg [2] : int $end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
298 end position in cds coords
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
299 Example : @genomic_coords = $transcript_mapper->cds2genomic(69, 306);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
300 Description: Converts cds coordinates into genomic coordinates. The
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
301 coordinates returned are relative to the same slice that the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
302 transcript used to construct this TranscriptMapper was on.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
303 Returntype : list of Bio::EnsEMBL::Mapper::Gap and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
304 Bio::EnsEMBL::Mapper::Coordinate objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
305 Exceptions : throws if no end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
306 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
307 Status : at risk
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
308
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
309 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
310
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
311 sub cds2genomic {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
312 my ( $self, $start, $end ) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
313
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
314 if ( !( defined($start) && defined($end) ) ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
315 throw("Must call with start and end");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
316 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
317
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
318 # Move start end into translate cDNA coordinates now.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
319 $start = $start +( $self->{'cdna_coding_start'} - 1 ) ;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
320 $end = $end + ( $self->{'cdna_coding_start'} - 1 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
321
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
322 return $self->cdna2genomic( $start, $end );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
323 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
324
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
325 =head2 pep2genomic
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
326
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
327 Arg [1] : int $start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
328 start position in peptide coords
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
329 Arg [2] : int $end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
330 end position in peptide coords
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
331 Example : @genomic_coords = $transcript_mapper->pep2genomic(23, 102);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
332 Description: Converts peptide coordinates into genomic coordinates. The
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
333 coordinates returned are relative to the same slice that the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
334 transcript used to construct this TranscriptMapper was on.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
335 Returntype : list of Bio::EnsEMBL::Mapper::Gap and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
336 Bio::EnsEMBL::Mapper::Coordinate objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
337 Exceptions : throws if no end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
338 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
339 Status : Stable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
340
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
341 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
342
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
343 sub pep2genomic {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
344 my ( $self, $start, $end ) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
345
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
346 if ( !( defined($start) && defined($end) ) ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
347 throw("Must call with start and end");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
348 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
349
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
350 # Take possible N-padding at beginning of CDS into account.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
351 my $start_phase = $self->{'start_phase'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
352 my $shift = ( $start_phase > 0 ) ? $start_phase : 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
353
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
354 # Move start end into translate cDNA coordinates now.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
355 $start = 3*$start - 2 + ( $self->{'cdna_coding_start'} - 1 ) - $shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
356 $end = 3*$end + ( $self->{'cdna_coding_start'} - 1 ) - $shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
357
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
358 return $self->cdna2genomic( $start, $end );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
359 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
360
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
361
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
362 =head2 genomic2cds
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
363
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
364 Arg [1] : int $start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
365 The genomic start position
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
366 Arg [2] : int $end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
367 The genomic end position
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
368 Arg [3] : int $strand
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
369 The genomic strand
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
370 Example : @cds_coords = $trans_mapper->genomic2cds($start, $end, $strand);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
371 Description: Converts genomic coordinates into CDS coordinates of the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
372 transcript that was used to create this transcript mapper.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
373 Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
374 Bio::EnsEMBL::Mapper::Gap objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
375 Exceptions : throw if start, end or strand not defined
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
376 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
377 Status : Stable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
378
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
379 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
380
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
381 sub genomic2cds {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
382 my ($self, $start, $end, $strand) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
383
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
384 if(!defined($start) || !defined($end) || !defined($strand)) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
385 throw("start, end and strand arguments are required");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
386 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
387
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
388 if($start > $end + 1) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
389 throw("start arg must be less than or equal to end arg + 1");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
390 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
391
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
392 my $cdna_cstart = $self->{'cdna_coding_start'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
393 my $cdna_cend = $self->{'cdna_coding_end'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
394
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
395 #this is a pseudogene if there is no coding region
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
396 if(!defined($cdna_cstart)) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
397 #return a gap of the entire requested region, there is no CDS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
398 return Bio::EnsEMBL::Mapper::Gap->new($start,$end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
399 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
400
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
401 my @coords = $self->genomic2cdna($start, $end, $strand);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
402
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
403 my @out;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
404
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
405 foreach my $coord (@coords) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
406 if($coord->isa('Bio::EnsEMBL::Mapper::Gap')) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
407 push @out, $coord;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
408 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
409 my $start = $coord->start;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
410 my $end = $coord->end;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
411
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
412 if($coord->strand == -1 || $end < $cdna_cstart || $start > $cdna_cend) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
413 #is all gap - does not map to peptide
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
414 push @out, Bio::EnsEMBL::Mapper::Gap->new($start,$end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
415 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
416 #we know area is at least partially overlapping CDS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
417
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
418 my $cds_start = $start - $cdna_cstart + 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
419 my $cds_end = $end - $cdna_cstart + 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
420
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
421 if($start < $cdna_cstart) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
422 #start of coordinates are in the 5prime UTR
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
423 push @out, Bio::EnsEMBL::Mapper::Gap->new($start, $cdna_cstart-1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
424
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
425 #start is now relative to start of CDS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
426 $cds_start = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
427 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
428
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
429 my $end_gap = undef;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
430 if($end > $cdna_cend) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
431 #end of coordinates are in the 3prime UTR
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
432 $end_gap = Bio::EnsEMBL::Mapper::Gap->new($cdna_cend + 1, $end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
433 #adjust end to relative to CDS start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
434 $cds_end = $cdna_cend - $cdna_cstart + 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
435 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
436
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
437 #start and end are now entirely in CDS and relative to CDS start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
438 $coord->start($cds_start);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
439 $coord->end($cds_end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
440
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
441 push @out, $coord;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
442
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
443 if($end_gap) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
444 #push out the region which was in the 3prime utr
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
445 push @out, $end_gap;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
446 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
447 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
448 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
449 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
450
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
451 return @out;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
452
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
453 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
454
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
455
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
456 =head2 genomic2pep
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
457
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
458 Arg [1] : $start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
459 The start position in genomic coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
460 Arg [2] : $end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
461 The end position in genomic coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
462 Arg [3] : $strand
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
463 The strand of the genomic coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
464 Example : @pep_coords = $transcript->genomic2pep($start, $end, $strand);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
465 Description: Converts genomic coordinates to peptide coordinates. The
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
466 return value is a list of coordinates and gaps.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
467 Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
468 Bio::EnsEMBL::Mapper::Gap objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
469 Exceptions : throw if start, end or strand not defined
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
470 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
471 Status : Stable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
472
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
473 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
474
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
475 sub genomic2pep {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
476 my ($self, $start, $end, $strand) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
477
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
478 unless(defined $start && defined $end && defined $strand) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
479 throw("start, end and strand arguments are required");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
480 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
481
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
482 my @coords = $self->genomic2cds($start, $end, $strand);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
483
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
484 my @out;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
485
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
486 my $start_phase = $self->{'start_phase'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
487
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
488 #take into account possible N padding at beginning of CDS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
489 my $shift = ($start_phase > 0) ? $start_phase : 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
490
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
491 foreach my $coord (@coords) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
492 if($coord->isa('Bio::EnsEMBL::Mapper::Gap')) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
493 push @out, $coord;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
494 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
495
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
496 #start and end are now entirely in CDS and relative to CDS start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
497
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
498 #convert to peptide coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
499 my $pep_start = int(($coord->start + $shift + 2) / 3);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
500 my $pep_end = int(($coord->end + $shift + 2) / 3);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
501 $coord->start($pep_start);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
502 $coord->end($pep_end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
503
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
504 push @out, $coord;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
505 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
506 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
507
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
508 return @out;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
509 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
510
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
511
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
512 1;