annotate variant_effect_predictor/Bio/EnsEMBL/TranscriptMapper.pm @ 1:d6778b5d8382 draft default tip

Deleted selected files
author willmclaren
date Fri, 03 Aug 2012 10:05:43 -0400
parents 21066c0abaf5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 =head1 LICENSE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 This software is distributed under a modified Apache license.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 For license details, please see
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 =head1 CONTACT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 <helpdesk@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 TranscriptMapper - A utility class used to perform coordinate conversions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24 between a number of coordinate systems relating to transcripts
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28 my $trmapper = Bio::EnsEMBL::TranscriptMapper->new($transcript);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30 @coords = $trmapper->cdna2genomic( 123, 554 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32 @coords = $trmapper->genomic2cdna( 141, 500, -1 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34 @coords = $trmapper->genomic2cds( 141, 500, -1 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36 @coords = $trmapper->pep2genomic( 10, 60 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38 @coords = $trmapper->genomic2pep( 123, 400, 1 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42 This is a utility class which can be used to perform coordinate conversions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 between a number of coordinate systems relating to transcripts.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 =head1 METHODS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49 package Bio::EnsEMBL::TranscriptMapper;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52 use warnings;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54 use Bio::EnsEMBL::Utils::Exception qw(throw);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56 use Bio::EnsEMBL::Mapper;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 use Bio::EnsEMBL::Mapper::Gap;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58 use Bio::EnsEMBL::Mapper::Coordinate;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62 =head2 new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64 Arg [1] : Bio::EnsEMBL::Transcript $transcript
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65 The transcript for which a TranscriptMapper should be created.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66 Example : $trans_mapper = Bio::EnsEMBL::TranscriptMapper->new($transcript)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67 Description: Creates a TranscriptMapper object which can be used to perform
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68 various coordinate transformations relating to transcripts.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 Note that the TranscriptMapper uses the transcript state at the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70 time of creation to perform the conversions, and that a new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 TranscriptMapper must be created if the Transcript is altered.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72 'Genomic' coordinates are coordinates which are relative to the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 slice that the Transcript is on.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74 Returntype : Bio::EnsEMBL::TranscriptMapper
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 Exceptions : throws if a transcript is not an argument
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76 Caller : Transcript::get_TranscriptMapper
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 sub new {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82 my $caller = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 my $transcript = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85 my $class = ref($caller) || $caller;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87 if(!ref($transcript) || !$transcript->isa('Bio::EnsEMBL::Transcript')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 throw("Transcript argument is required.");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92 my $exons = $transcript->get_all_Exons();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93 my $start_phase;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94 if(@$exons) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 $start_phase = $exons->[0]->phase;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97 $start_phase = -1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 # Create a cdna <-> genomic mapper and load it with exon coords
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101 my $mapper = _load_mapper($transcript,$start_phase);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103 my $self = bless({'exon_coord_mapper' => $mapper,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104 'start_phase' => $start_phase,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105 'cdna_coding_start' => $transcript->cdna_coding_start(),
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106 'cdna_coding_end' => $transcript->cdna_coding_end()},
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107 $class);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 =head2 _load_mapper
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 Arg [1] : Bio::EnsEMBL::Transcript $transcript
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114 The transcript for which a mapper should be created.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 Example : my $mapper = _load_mapper($transcript);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116 Description: loads the mapper
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117 Returntype : Bio::EnsEMBL::Mapper
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 Caller : Internal
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 sub _load_mapper {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125 my $transcript = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126 my $start_phase = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 my $mapper = Bio::EnsEMBL::Mapper->new( 'cdna', 'genomic');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 my $edits_on = $transcript->edits_enabled();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131 my @edits;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133 if($edits_on) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134 @edits = @{$transcript->get_all_SeqEdits()};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135 @edits = sort {$a->start() <=> $b->start()} @edits;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138 my $edit_shift = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140 my $cdna_start = undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 my $cdna_end = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145 foreach my $ex (@{$transcript->get_all_Exons}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146 my $gen_start = $ex->start();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147 my $gen_end = $ex->end();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149 $cdna_start = $cdna_end + 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150 $cdna_end = $cdna_start + $ex->length() - 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152 my $strand = $ex->strand();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 # add deletions and insertions into pairs when SeqEdits turned on
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155 # ignore mismatches (i.e. treat as matches)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156 if($edits_on) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157 while(@edits && $edits[0]->start() + $edit_shift <= $cdna_end) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 my $edit = shift(@edits);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160 my $len_diff = $edit->length_diff();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162 if($len_diff) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163 # break pair into two parts, finish first pair just before edit
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 my $prev_cdna_end = $edit->start() + $edit_shift - 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166 my $prev_cdna_start = $cdna_start;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167 my $prev_len = $prev_cdna_end - $prev_cdna_start + 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 my $prev_gen_end;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170 my $prev_gen_start;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 if($strand == 1) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172 $prev_gen_start = $gen_start;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173 $prev_gen_end = $gen_start + $prev_len - 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175 $prev_gen_start = $gen_end - $prev_len + 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176 $prev_gen_end = $gen_end;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 if($prev_len > 0) { # only create map pair if not boundary case
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180 $mapper->add_map_coordinates
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 ('cdna', $prev_cdna_start, $prev_cdna_end, $strand,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 'genome', $prev_gen_start,$prev_gen_end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 $cdna_start = $prev_cdna_end + 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 if($strand == 1) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188 $gen_start = $prev_gen_end + 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190 $gen_end = $prev_gen_start - 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193 $cdna_end += $len_diff;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195 if($len_diff > 0) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196 # insert in cdna, shift cdna coords along
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 $cdna_start += $len_diff;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199 # delete in cdna (insert in genomic), shift genomic coords along
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 if($strand == 1) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202 $gen_start -= $len_diff;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204 $gen_end += $len_diff;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208 $edit_shift += $len_diff;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213 my $pair_len = $cdna_end - $cdna_start + 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215 if($pair_len > 0) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216 $mapper->add_map_coordinates('cdna', $cdna_start, $cdna_end, $strand,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 'genome', $gen_start, $gen_end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221 return $mapper;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225 =head2 cdna2genomic
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 Arg [1] : $start
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228 The start position in cdna coordinates
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 Arg [2] : $end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230 The end position in cdna coordinates
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231 Example : @cdna_coords = $transcript_mapper->cdna2genomic($start, $end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 Description: Converts cdna coordinates to genomic coordinates. The
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233 return value is a list of coordinates and gaps.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234 Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235 Bio::EnsEMBL::Mapper::Gap objects
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236 Exceptions : throws if no start or end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243 sub cdna2genomic {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 my ($self,$start,$end) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246 if( !defined $end ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247 throw("Must call with start/end");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250 my $mapper = $self->{'exon_coord_mapper'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252 return $mapper->map_coordinates( 'cdna', $start, $end, 1, "cdna" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 =head2 genomic2cdna
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259 Arg [1] : $start
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260 The start position in genomic coordinates
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 Arg [2] : $end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 The end position in genomic coordinates
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263 Arg [3] : $strand
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 The strand of the genomic coordinates (default value 1)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265 Example : @coords = $trans_mapper->genomic2cdna($start, $end, $strnd);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266 Description: Converts genomic coordinates to cdna coordinates. The
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267 return value is a list of coordinates and gaps. Gaps
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268 represent intronic or upstream/downstream regions which do
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269 not comprise this transcripts cdna. Coordinate objects
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270 represent genomic regions which map to exons (utrs included).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271 Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272 Bio::EnsEMBL::Mapper::Gap objects
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 Exceptions : throws if start, end or strand not defined
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279 sub genomic2cdna {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280 my ($self, $start, $end, $strand) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 unless(defined $start && defined $end && defined $strand) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283 throw("start, end and strand arguments are required\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286 my $mapper = $self->{'exon_coord_mapper'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 return $mapper->map_coordinates("genome", $start, $end, $strand,"genomic");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293 =head2 cds2genomic
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 Arg [1] : int $start
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296 start position in cds coords
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297 Arg [2] : int $end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298 end position in cds coords
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299 Example : @genomic_coords = $transcript_mapper->cds2genomic(69, 306);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300 Description: Converts cds coordinates into genomic coordinates. The
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301 coordinates returned are relative to the same slice that the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302 transcript used to construct this TranscriptMapper was on.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 Returntype : list of Bio::EnsEMBL::Mapper::Gap and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304 Bio::EnsEMBL::Mapper::Coordinate objects
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305 Exceptions : throws if no end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307 Status : at risk
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311 sub cds2genomic {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312 my ( $self, $start, $end ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314 if ( !( defined($start) && defined($end) ) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315 throw("Must call with start and end");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 # Move start end into translate cDNA coordinates now.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 $start = $start +( $self->{'cdna_coding_start'} - 1 ) ;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320 $end = $end + ( $self->{'cdna_coding_start'} - 1 );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322 return $self->cdna2genomic( $start, $end );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325 =head2 pep2genomic
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327 Arg [1] : int $start
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328 start position in peptide coords
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329 Arg [2] : int $end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330 end position in peptide coords
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331 Example : @genomic_coords = $transcript_mapper->pep2genomic(23, 102);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332 Description: Converts peptide coordinates into genomic coordinates. The
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333 coordinates returned are relative to the same slice that the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 transcript used to construct this TranscriptMapper was on.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335 Returntype : list of Bio::EnsEMBL::Mapper::Gap and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 Bio::EnsEMBL::Mapper::Coordinate objects
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337 Exceptions : throws if no end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343 sub pep2genomic {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344 my ( $self, $start, $end ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346 if ( !( defined($start) && defined($end) ) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347 throw("Must call with start and end");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350 # Take possible N-padding at beginning of CDS into account.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351 my $start_phase = $self->{'start_phase'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352 my $shift = ( $start_phase > 0 ) ? $start_phase : 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354 # Move start end into translate cDNA coordinates now.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355 $start = 3*$start - 2 + ( $self->{'cdna_coding_start'} - 1 ) - $shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356 $end = 3*$end + ( $self->{'cdna_coding_start'} - 1 ) - $shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358 return $self->cdna2genomic( $start, $end );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
361
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
362 =head2 genomic2cds
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
363
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
364 Arg [1] : int $start
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
365 The genomic start position
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
366 Arg [2] : int $end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
367 The genomic end position
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
368 Arg [3] : int $strand
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
369 The genomic strand
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
370 Example : @cds_coords = $trans_mapper->genomic2cds($start, $end, $strand);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
371 Description: Converts genomic coordinates into CDS coordinates of the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
372 transcript that was used to create this transcript mapper.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
373 Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
374 Bio::EnsEMBL::Mapper::Gap objects
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
375 Exceptions : throw if start, end or strand not defined
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
376 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
377 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
378
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
379 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
380
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
381 sub genomic2cds {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
382 my ($self, $start, $end, $strand) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
383
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
384 if(!defined($start) || !defined($end) || !defined($strand)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
385 throw("start, end and strand arguments are required");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
386 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
387
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
388 if($start > $end + 1) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
389 throw("start arg must be less than or equal to end arg + 1");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
390 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
391
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
392 my $cdna_cstart = $self->{'cdna_coding_start'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
393 my $cdna_cend = $self->{'cdna_coding_end'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
394
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
395 #this is a pseudogene if there is no coding region
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
396 if(!defined($cdna_cstart)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
397 #return a gap of the entire requested region, there is no CDS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
398 return Bio::EnsEMBL::Mapper::Gap->new($start,$end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
399 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
400
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
401 my @coords = $self->genomic2cdna($start, $end, $strand);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
402
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
403 my @out;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
404
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
405 foreach my $coord (@coords) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
406 if($coord->isa('Bio::EnsEMBL::Mapper::Gap')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
407 push @out, $coord;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
408 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
409 my $start = $coord->start;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
410 my $end = $coord->end;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
411
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
412 if($coord->strand == -1 || $end < $cdna_cstart || $start > $cdna_cend) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
413 #is all gap - does not map to peptide
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
414 push @out, Bio::EnsEMBL::Mapper::Gap->new($start,$end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
415 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
416 #we know area is at least partially overlapping CDS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
417
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
418 my $cds_start = $start - $cdna_cstart + 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
419 my $cds_end = $end - $cdna_cstart + 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
420
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
421 if($start < $cdna_cstart) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
422 #start of coordinates are in the 5prime UTR
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
423 push @out, Bio::EnsEMBL::Mapper::Gap->new($start, $cdna_cstart-1);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
424
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
425 #start is now relative to start of CDS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
426 $cds_start = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
427 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
428
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
429 my $end_gap = undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
430 if($end > $cdna_cend) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
431 #end of coordinates are in the 3prime UTR
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
432 $end_gap = Bio::EnsEMBL::Mapper::Gap->new($cdna_cend + 1, $end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
433 #adjust end to relative to CDS start
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
434 $cds_end = $cdna_cend - $cdna_cstart + 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
435 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
436
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
437 #start and end are now entirely in CDS and relative to CDS start
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
438 $coord->start($cds_start);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
439 $coord->end($cds_end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
440
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
441 push @out, $coord;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
442
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
443 if($end_gap) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
444 #push out the region which was in the 3prime utr
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
445 push @out, $end_gap;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
446 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
447 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
448 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
449 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
450
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
451 return @out;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
452
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
453 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
454
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
455
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
456 =head2 genomic2pep
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
457
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
458 Arg [1] : $start
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
459 The start position in genomic coordinates
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
460 Arg [2] : $end
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
461 The end position in genomic coordinates
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
462 Arg [3] : $strand
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
463 The strand of the genomic coordinates
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
464 Example : @pep_coords = $transcript->genomic2pep($start, $end, $strand);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
465 Description: Converts genomic coordinates to peptide coordinates. The
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
466 return value is a list of coordinates and gaps.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
467 Returntype : list of Bio::EnsEMBL::Mapper::Coordinate and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
468 Bio::EnsEMBL::Mapper::Gap objects
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
469 Exceptions : throw if start, end or strand not defined
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
470 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
471 Status : Stable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
472
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
473 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
474
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
475 sub genomic2pep {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
476 my ($self, $start, $end, $strand) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
477
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
478 unless(defined $start && defined $end && defined $strand) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
479 throw("start, end and strand arguments are required");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
480 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
481
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
482 my @coords = $self->genomic2cds($start, $end, $strand);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
483
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
484 my @out;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
485
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
486 my $start_phase = $self->{'start_phase'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
487
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
488 #take into account possible N padding at beginning of CDS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
489 my $shift = ($start_phase > 0) ? $start_phase : 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
490
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
491 foreach my $coord (@coords) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
492 if($coord->isa('Bio::EnsEMBL::Mapper::Gap')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
493 push @out, $coord;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
494 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
495
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
496 #start and end are now entirely in CDS and relative to CDS start
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
497
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
498 #convert to peptide coordinates
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
499 my $pep_start = int(($coord->start + $shift + 2) / 3);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
500 my $pep_end = int(($coord->end + $shift + 2) / 3);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
501 $coord->start($pep_start);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
502 $coord->end($pep_end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
503
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
504 push @out, $coord;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
505 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
506 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
507
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
508 return @out;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
509 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
510
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
511
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
512 1;