comparison variant_effect_predictor/Bio/EnsEMBL/Analysis/PairAlign.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 PairAlign - Dna pairwise alignment module
24
25 =head1 SYNOPSIS
26
27 #To convert between coordinates:
28
29 my $cdna_coord = $pair->genomic2cDNA($gen_coord);
30 my $gen_coord = $pair->cDNA2genomic($cdna_coord);
31
32 =head1 DESCRIPTION
33
34 Contains list of sub alignments making up a dna-dna alignment
35
36 Creation:
37
38 my $pair = new Bio::EnsEMBL::FeaturePair(
39 -start => $qstart,
40 -end => $qend,
41 -strand => $qstrand,
42 -hstart => $hstart,
43 -hend => $hend,
44 -hend => $hstrand,
45 );
46
47 my $pairaln = new Bio::EnsEMBL::Analysis::PairAlign;
48 $pairaln->addFeaturePair($pair);
49
50 Any number of pair alignments can be added to the PairAlign object
51
52 =cut
53
54 package Bio::EnsEMBL::Analysis::PairAlign;
55
56 use vars qw(@ISA);
57 use strict;
58
59
60 @ISA = qw(Bio::EnsEMBL::Root);
61
62 sub new {
63 my($class,@args) = @_;
64 my $self = {};
65 bless $self, $class;
66
67 $self->{'_homol'} = [];
68
69 return $self; # success - we hope!
70 }
71
72 sub addFeaturePair {
73 my ($self,$pair) = @_;
74
75 $self->throw("Not a Bio::EnsEMBL::FeaturePair object") unless ($pair->isa("Bio::EnsEMBL::FeaturePair"));
76
77 push(@{$self->{'_pairs'}},$pair);
78
79 }
80
81
82 =head2 eachFeaturePair
83
84 Title : eachFeaturePair
85 Example : my @pairs = $pair->eachFeaturePair
86 Returns : Array of Bio::SeqFeature::FeaturePair
87 Args : none
88 =cut
89
90 sub eachFeaturePair {
91 my ($self) = @_;
92
93 if (defined($self->{'_pairs'})) {
94 return @{$self->{'_pairs'}};
95 }
96 }
97
98 sub get_hstrand {
99 my ($self) = @_;
100
101 my @features = $self->eachFeaturePair;
102
103 return $features[0]->hstrand;
104 }
105
106 =head2 genomic2cDNA
107
108 Title : genomic2cDNA
109 Usage : my $cdna_coord = $pair->genomic2cDNA($gen_coord)
110 Function: Converts a genomic coordinate to a cdna coordinate
111 Returns : int
112 Args : int
113 =cut
114
115 sub genomic2cDNA {
116 my ($self,$coord) = @_;
117 my @pairs = $self->eachFeaturePair;
118
119 @pairs = sort {$a->start <=> $b->start} @pairs;
120
121 my $newcoord;
122
123 HOMOL: while (my $sf1 = shift(@pairs)) {
124 next HOMOL unless ($coord >= $sf1->start && $coord <= $sf1->end);
125
126 if ($sf1->strand == 1 && $sf1->hstrand == 1) {
127 $newcoord = $sf1->hstart + ($coord - $sf1->start);
128 last HOMOL;
129 } elsif ($sf1->strand == 1 && $sf1->hstrand == -1) {
130 $newcoord = $sf1->hend - ($coord - $sf1->start);
131 last HOMOL;
132 } elsif ($sf1->strand == -1 && $sf1->hstrand == 1) {
133 $newcoord = $sf1->hstart + ($sf1->end - $coord);
134 last HOMOL;
135 } elsif ($sf1->strand == -1 && $sf1->hstrand == -1) {
136 $newcoord = $sf1->hend - ($sf1->end - $coord);
137 last HOMOL;
138 } else {
139 $self->throw("ERROR: Wrong strand value in FeaturePair (" . $sf1->strand . "/" . $sf1->hstrand . "\n");
140 }
141 }
142
143 if (defined($newcoord)) {
144
145 return $newcoord;
146 } else {
147 $self->throw("Couldn't convert $coord");
148 }
149 }
150
151 =head2 cDNA2genomic
152
153 Title : cDNA2genomic
154 Usage : my $gen_coord = $pair->genomic2cDNA($cdna_coord)
155 Function: Converts a cdna coordinate to a genomic coordinate
156 Example :
157 Returns : int
158 Args : int
159
160
161 =cut
162
163 sub cDNA2genomic {
164 my ($self,$coord) = @_;
165
166 my @pairs = $self->eachFeaturePair;
167
168 my $newcoord;
169
170 HOMOL: while (my $sf1 = shift(@pairs)) {
171 next HOMOL unless ($coord >= $sf1->hstart && $coord <= $sf1->hend);
172
173 if ($sf1->strand == 1 && $sf1->hstrand == 1) {
174 $newcoord = $sf1->start + ($coord - $sf1->hstart);
175 last HOMOL;
176 } elsif ($sf1->strand == 1 && $sf1->hstrand == -1) {
177 $newcoord = $sf1->start +($sf1->hend - $coord);
178 last HOMOL;
179 } elsif ($sf1->strand == -1 && $sf1->hstrand == 1) {
180 $newcoord = $sf1->end - ($coord - $sf1->hstart);
181 last HOMOL;
182 } elsif ($sf1->strand == -1 && $sf1->hstrand == -1) {
183 $newcoord = $sf1->end - ($sf1->hend - $coord);
184 last HOMOL;
185 } else {
186 $self->throw("ERROR: Wrong strand value in homol (" . $sf1->strand . "/" . $sf1->hstrand . "\n");
187 }
188 }
189
190 if (defined ($newcoord)) {
191 return $newcoord;
192 } else {
193 $self->throw("Couldn't convert $coord\n");
194 }
195 }
196
197 sub find_Pair {
198 my ($self,$coord) = @_;
199
200 foreach my $p ($self->eachFeaturePair) {
201 if ($coord >= $p->hstart && $coord <= $p->hend) {
202 return $p;
203 }
204 }
205 }
206
207 =head2 convert_cDNA_feature
208
209 Title : convert_cDNA_feature
210 Usage : my @newfeatures = $self->convert_cDNA_feature($f);
211 Function: Converts a feature on the cDNA into an array of
212 features on the genomic (for features that span across introns);
213 Returns : Array of Bio::EnsEMBL::FeaturePair
214 Args : Bio::EnsEMBL::FeaturePair
215
216 =cut
217
218 sub convert_cDNA_feature {
219 my ($self,$feature) = @_;
220
221 my $foundstart = 0;
222 my $foundend = 0;
223
224 my @pairs = $self->eachFeaturePair;
225 my @newfeatures;
226
227 HOMOL: while (my $sf1 = shift(@pairs)) {
228 my $skip = 0;
229 #print STDERR "Looking at cDNA exon " . $sf1->hstart . "\t" . $sf1->hend . "\t" . $sf1->strand ."\n";
230
231 $skip = 1 unless ($feature->start >= $sf1->hstart
232 && $feature->start <= $sf1->hend);
233
234 if($skip){
235 #print STDERR "Skipping ".$sf1->hstart . "\t" . $sf1->hend . "\t" . $sf1->strand ."\n";
236 next HOMOL;
237 }
238 if ($feature->end >= $sf1->hstart && $feature->end <= $sf1->hend) {
239 $foundend = 1;
240 }
241
242 my $startcoord = $self->cDNA2genomic($feature->start);
243 my $endcoord;
244
245 if ($sf1->hstrand == 1) {
246 $endcoord = $sf1->end;
247 } else {
248 $endcoord = $sf1->start;
249 }
250
251 if ($foundend) {
252 $endcoord = $self->cDNA2genomic($feature->end);
253 }
254
255 #print STDERR "Making new genomic feature $startcoord\t$endcoord\n";
256
257 my $tmpf = new Bio::EnsEMBL::Feature(-seqname => $feature->seqname,
258 -start => $startcoord,
259 -end => $endcoord,
260 -strand => $feature->strand);
261 push(@newfeatures,$tmpf);
262 last;
263 }
264
265 # Now the rest of the pairs until we find the endcoord
266
267 while ((my $sf1 = shift(@pairs)) && ($foundend == 0)) {
268
269 if ($feature->end >= $sf1->hstart && $feature->end <= $sf1->hend) {
270 $foundend = 1;
271 }
272
273 my $startcoord;
274 my $endcoord;
275
276 if ($sf1->hstrand == 1) {
277 $startcoord = $sf1->start;
278 $endcoord = $sf1->end;
279 } else {
280 $startcoord = $sf1->end;
281 $endcoord = $sf1->start;
282 }
283
284 if ($foundend) {
285 $endcoord = $self->cDNA2genomic($feature->end);
286 }
287
288 # #print STDERR "Making new genomic feature $startcoord\t$endcoord\n";
289
290 my $tmpf = new Bio::EnsEMBL::Feature(-seqname => $feature->seqname,
291 -start => $startcoord,
292 -end => $endcoord,
293 -strand => $feature->strand);
294 push(@newfeatures,$tmpf);
295 }
296 #print STDERR "Have ".@newfeatures." features from ".$feature."\n";
297 return @newfeatures;
298 }
299
300
301 sub convert_FeaturePair {
302 my ($self,$pair) = @_;
303
304 my $hstrand = $self->get_hstrand;
305
306 my $feat = $self->create_Feature($pair->start, $pair->end, $pair->strand,
307 $pair->slice);
308 my @newfeatures = $self->convert_cDNA_feature($feat);
309 my @newpairs;
310
311 my $hitpairaln = new Bio::EnsEMBL::Analysis::PairAlign;
312 $hitpairaln->addFeaturePair($pair);
313
314 foreach my $new (@newfeatures) {
315
316 # Now we want to convert these cDNA coords into hit coords
317
318 my $hstart1 = $self->genomic2cDNA($new->start);
319 my $hend1 = $self->genomic2cDNA($new->end);
320
321 my $hstart2 = $hitpairaln->genomic2cDNA($hstart1);
322 my $hend2 = $hitpairaln->genomic2cDNA($hend1);
323
324 # We can now put the final feature together
325
326 my $finalstrand = $hstrand * $pair->strand * $pair->hstrand;
327
328 if ($hstart2 > $hend2) {
329 my $tmp = $hstart2;
330 $hstart2 = $hend2;
331 $hend2 = $tmp;
332 }
333
334 my $finalpair = $self->create_FeaturePair($new->start, $new->end,
335 $new->strand,
336 $hstart2, $hend2,
337 $finalstrand, $pair->score);
338
339 push(@newpairs,$finalpair);
340
341 }
342
343 return @newpairs;
344 }
345
346 sub create_FeaturePair {
347 my ($self, $start, $end, $strand, $hstart, $hend,
348 $hstrand, $score) = @_;
349
350 my $fp = Bio::EnsEMBL::FeaturePair->new(
351 -start => $start,
352 -end => $end,
353 -strand => $strand,
354 -hstart => $hstart,
355 -hend => $hend,
356 -hstrand => $hstrand,
357 -score => $score,
358 );
359
360
361 return $fp;
362 }
363
364 sub create_Feature{
365 my ($self, $start, $end, $strand, $slice) = @_;
366
367 my $feat = new Bio::EnsEMBL::Feature(-start => $start,
368 -end => $end,
369 -strand => $strand,
370 -slice => $slice,
371 );
372 return $feat;
373 }
374
375 1;
376
377
378
379
380
381
382
383