Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Analysis/PairAlign.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =cut | |
20 | |
21 =head1 NAME | |
22 | |
23 PairAlign - Dna pairwise alignment module | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 #To convert between coordinates: | |
28 | |
29 my $cdna_coord = $pair->genomic2cDNA($gen_coord); | |
30 my $gen_coord = $pair->cDNA2genomic($cdna_coord); | |
31 | |
32 =head1 DESCRIPTION | |
33 | |
34 Contains list of sub alignments making up a dna-dna alignment | |
35 | |
36 Creation: | |
37 | |
38 my $pair = new Bio::EnsEMBL::FeaturePair( | |
39 -start => $qstart, | |
40 -end => $qend, | |
41 -strand => $qstrand, | |
42 -hstart => $hstart, | |
43 -hend => $hend, | |
44 -hend => $hstrand, | |
45 ); | |
46 | |
47 my $pairaln = new Bio::EnsEMBL::Analysis::PairAlign; | |
48 $pairaln->addFeaturePair($pair); | |
49 | |
50 Any number of pair alignments can be added to the PairAlign object | |
51 | |
52 =cut | |
53 | |
54 package Bio::EnsEMBL::Analysis::PairAlign; | |
55 | |
56 use vars qw(@ISA); | |
57 use strict; | |
58 | |
59 | |
60 @ISA = qw(Bio::EnsEMBL::Root); | |
61 | |
62 sub new { | |
63 my($class,@args) = @_; | |
64 my $self = {}; | |
65 bless $self, $class; | |
66 | |
67 $self->{'_homol'} = []; | |
68 | |
69 return $self; # success - we hope! | |
70 } | |
71 | |
72 sub addFeaturePair { | |
73 my ($self,$pair) = @_; | |
74 | |
75 $self->throw("Not a Bio::EnsEMBL::FeaturePair object") unless ($pair->isa("Bio::EnsEMBL::FeaturePair")); | |
76 | |
77 push(@{$self->{'_pairs'}},$pair); | |
78 | |
79 } | |
80 | |
81 | |
82 =head2 eachFeaturePair | |
83 | |
84 Title : eachFeaturePair | |
85 Example : my @pairs = $pair->eachFeaturePair | |
86 Returns : Array of Bio::SeqFeature::FeaturePair | |
87 Args : none | |
88 =cut | |
89 | |
90 sub eachFeaturePair { | |
91 my ($self) = @_; | |
92 | |
93 if (defined($self->{'_pairs'})) { | |
94 return @{$self->{'_pairs'}}; | |
95 } | |
96 } | |
97 | |
98 sub get_hstrand { | |
99 my ($self) = @_; | |
100 | |
101 my @features = $self->eachFeaturePair; | |
102 | |
103 return $features[0]->hstrand; | |
104 } | |
105 | |
106 =head2 genomic2cDNA | |
107 | |
108 Title : genomic2cDNA | |
109 Usage : my $cdna_coord = $pair->genomic2cDNA($gen_coord) | |
110 Function: Converts a genomic coordinate to a cdna coordinate | |
111 Returns : int | |
112 Args : int | |
113 =cut | |
114 | |
115 sub genomic2cDNA { | |
116 my ($self,$coord) = @_; | |
117 my @pairs = $self->eachFeaturePair; | |
118 | |
119 @pairs = sort {$a->start <=> $b->start} @pairs; | |
120 | |
121 my $newcoord; | |
122 | |
123 HOMOL: while (my $sf1 = shift(@pairs)) { | |
124 next HOMOL unless ($coord >= $sf1->start && $coord <= $sf1->end); | |
125 | |
126 if ($sf1->strand == 1 && $sf1->hstrand == 1) { | |
127 $newcoord = $sf1->hstart + ($coord - $sf1->start); | |
128 last HOMOL; | |
129 } elsif ($sf1->strand == 1 && $sf1->hstrand == -1) { | |
130 $newcoord = $sf1->hend - ($coord - $sf1->start); | |
131 last HOMOL; | |
132 } elsif ($sf1->strand == -1 && $sf1->hstrand == 1) { | |
133 $newcoord = $sf1->hstart + ($sf1->end - $coord); | |
134 last HOMOL; | |
135 } elsif ($sf1->strand == -1 && $sf1->hstrand == -1) { | |
136 $newcoord = $sf1->hend - ($sf1->end - $coord); | |
137 last HOMOL; | |
138 } else { | |
139 $self->throw("ERROR: Wrong strand value in FeaturePair (" . $sf1->strand . "/" . $sf1->hstrand . "\n"); | |
140 } | |
141 } | |
142 | |
143 if (defined($newcoord)) { | |
144 | |
145 return $newcoord; | |
146 } else { | |
147 $self->throw("Couldn't convert $coord"); | |
148 } | |
149 } | |
150 | |
151 =head2 cDNA2genomic | |
152 | |
153 Title : cDNA2genomic | |
154 Usage : my $gen_coord = $pair->genomic2cDNA($cdna_coord) | |
155 Function: Converts a cdna coordinate to a genomic coordinate | |
156 Example : | |
157 Returns : int | |
158 Args : int | |
159 | |
160 | |
161 =cut | |
162 | |
163 sub cDNA2genomic { | |
164 my ($self,$coord) = @_; | |
165 | |
166 my @pairs = $self->eachFeaturePair; | |
167 | |
168 my $newcoord; | |
169 | |
170 HOMOL: while (my $sf1 = shift(@pairs)) { | |
171 next HOMOL unless ($coord >= $sf1->hstart && $coord <= $sf1->hend); | |
172 | |
173 if ($sf1->strand == 1 && $sf1->hstrand == 1) { | |
174 $newcoord = $sf1->start + ($coord - $sf1->hstart); | |
175 last HOMOL; | |
176 } elsif ($sf1->strand == 1 && $sf1->hstrand == -1) { | |
177 $newcoord = $sf1->start +($sf1->hend - $coord); | |
178 last HOMOL; | |
179 } elsif ($sf1->strand == -1 && $sf1->hstrand == 1) { | |
180 $newcoord = $sf1->end - ($coord - $sf1->hstart); | |
181 last HOMOL; | |
182 } elsif ($sf1->strand == -1 && $sf1->hstrand == -1) { | |
183 $newcoord = $sf1->end - ($sf1->hend - $coord); | |
184 last HOMOL; | |
185 } else { | |
186 $self->throw("ERROR: Wrong strand value in homol (" . $sf1->strand . "/" . $sf1->hstrand . "\n"); | |
187 } | |
188 } | |
189 | |
190 if (defined ($newcoord)) { | |
191 return $newcoord; | |
192 } else { | |
193 $self->throw("Couldn't convert $coord\n"); | |
194 } | |
195 } | |
196 | |
197 sub find_Pair { | |
198 my ($self,$coord) = @_; | |
199 | |
200 foreach my $p ($self->eachFeaturePair) { | |
201 if ($coord >= $p->hstart && $coord <= $p->hend) { | |
202 return $p; | |
203 } | |
204 } | |
205 } | |
206 | |
207 =head2 convert_cDNA_feature | |
208 | |
209 Title : convert_cDNA_feature | |
210 Usage : my @newfeatures = $self->convert_cDNA_feature($f); | |
211 Function: Converts a feature on the cDNA into an array of | |
212 features on the genomic (for features that span across introns); | |
213 Returns : Array of Bio::EnsEMBL::FeaturePair | |
214 Args : Bio::EnsEMBL::FeaturePair | |
215 | |
216 =cut | |
217 | |
218 sub convert_cDNA_feature { | |
219 my ($self,$feature) = @_; | |
220 | |
221 my $foundstart = 0; | |
222 my $foundend = 0; | |
223 | |
224 my @pairs = $self->eachFeaturePair; | |
225 my @newfeatures; | |
226 | |
227 HOMOL: while (my $sf1 = shift(@pairs)) { | |
228 my $skip = 0; | |
229 #print STDERR "Looking at cDNA exon " . $sf1->hstart . "\t" . $sf1->hend . "\t" . $sf1->strand ."\n"; | |
230 | |
231 $skip = 1 unless ($feature->start >= $sf1->hstart | |
232 && $feature->start <= $sf1->hend); | |
233 | |
234 if($skip){ | |
235 #print STDERR "Skipping ".$sf1->hstart . "\t" . $sf1->hend . "\t" . $sf1->strand ."\n"; | |
236 next HOMOL; | |
237 } | |
238 if ($feature->end >= $sf1->hstart && $feature->end <= $sf1->hend) { | |
239 $foundend = 1; | |
240 } | |
241 | |
242 my $startcoord = $self->cDNA2genomic($feature->start); | |
243 my $endcoord; | |
244 | |
245 if ($sf1->hstrand == 1) { | |
246 $endcoord = $sf1->end; | |
247 } else { | |
248 $endcoord = $sf1->start; | |
249 } | |
250 | |
251 if ($foundend) { | |
252 $endcoord = $self->cDNA2genomic($feature->end); | |
253 } | |
254 | |
255 #print STDERR "Making new genomic feature $startcoord\t$endcoord\n"; | |
256 | |
257 my $tmpf = new Bio::EnsEMBL::Feature(-seqname => $feature->seqname, | |
258 -start => $startcoord, | |
259 -end => $endcoord, | |
260 -strand => $feature->strand); | |
261 push(@newfeatures,$tmpf); | |
262 last; | |
263 } | |
264 | |
265 # Now the rest of the pairs until we find the endcoord | |
266 | |
267 while ((my $sf1 = shift(@pairs)) && ($foundend == 0)) { | |
268 | |
269 if ($feature->end >= $sf1->hstart && $feature->end <= $sf1->hend) { | |
270 $foundend = 1; | |
271 } | |
272 | |
273 my $startcoord; | |
274 my $endcoord; | |
275 | |
276 if ($sf1->hstrand == 1) { | |
277 $startcoord = $sf1->start; | |
278 $endcoord = $sf1->end; | |
279 } else { | |
280 $startcoord = $sf1->end; | |
281 $endcoord = $sf1->start; | |
282 } | |
283 | |
284 if ($foundend) { | |
285 $endcoord = $self->cDNA2genomic($feature->end); | |
286 } | |
287 | |
288 # #print STDERR "Making new genomic feature $startcoord\t$endcoord\n"; | |
289 | |
290 my $tmpf = new Bio::EnsEMBL::Feature(-seqname => $feature->seqname, | |
291 -start => $startcoord, | |
292 -end => $endcoord, | |
293 -strand => $feature->strand); | |
294 push(@newfeatures,$tmpf); | |
295 } | |
296 #print STDERR "Have ".@newfeatures." features from ".$feature."\n"; | |
297 return @newfeatures; | |
298 } | |
299 | |
300 | |
301 sub convert_FeaturePair { | |
302 my ($self,$pair) = @_; | |
303 | |
304 my $hstrand = $self->get_hstrand; | |
305 | |
306 my $feat = $self->create_Feature($pair->start, $pair->end, $pair->strand, | |
307 $pair->slice); | |
308 my @newfeatures = $self->convert_cDNA_feature($feat); | |
309 my @newpairs; | |
310 | |
311 my $hitpairaln = new Bio::EnsEMBL::Analysis::PairAlign; | |
312 $hitpairaln->addFeaturePair($pair); | |
313 | |
314 foreach my $new (@newfeatures) { | |
315 | |
316 # Now we want to convert these cDNA coords into hit coords | |
317 | |
318 my $hstart1 = $self->genomic2cDNA($new->start); | |
319 my $hend1 = $self->genomic2cDNA($new->end); | |
320 | |
321 my $hstart2 = $hitpairaln->genomic2cDNA($hstart1); | |
322 my $hend2 = $hitpairaln->genomic2cDNA($hend1); | |
323 | |
324 # We can now put the final feature together | |
325 | |
326 my $finalstrand = $hstrand * $pair->strand * $pair->hstrand; | |
327 | |
328 if ($hstart2 > $hend2) { | |
329 my $tmp = $hstart2; | |
330 $hstart2 = $hend2; | |
331 $hend2 = $tmp; | |
332 } | |
333 | |
334 my $finalpair = $self->create_FeaturePair($new->start, $new->end, | |
335 $new->strand, | |
336 $hstart2, $hend2, | |
337 $finalstrand, $pair->score); | |
338 | |
339 push(@newpairs,$finalpair); | |
340 | |
341 } | |
342 | |
343 return @newpairs; | |
344 } | |
345 | |
346 sub create_FeaturePair { | |
347 my ($self, $start, $end, $strand, $hstart, $hend, | |
348 $hstrand, $score) = @_; | |
349 | |
350 my $fp = Bio::EnsEMBL::FeaturePair->new( | |
351 -start => $start, | |
352 -end => $end, | |
353 -strand => $strand, | |
354 -hstart => $hstart, | |
355 -hend => $hend, | |
356 -hstrand => $hstrand, | |
357 -score => $score, | |
358 ); | |
359 | |
360 | |
361 return $fp; | |
362 } | |
363 | |
364 sub create_Feature{ | |
365 my ($self, $start, $end, $strand, $slice) = @_; | |
366 | |
367 my $feat = new Bio::EnsEMBL::Feature(-start => $start, | |
368 -end => $end, | |
369 -strand => $strand, | |
370 -slice => $slice, | |
371 ); | |
372 return $feat; | |
373 } | |
374 | |
375 1; | |
376 | |
377 | |
378 | |
379 | |
380 | |
381 | |
382 | |
383 |