0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 PairAlign - Dna pairwise alignment module
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 #To convert between coordinates:
|
|
28
|
|
29 my $cdna_coord = $pair->genomic2cDNA($gen_coord);
|
|
30 my $gen_coord = $pair->cDNA2genomic($cdna_coord);
|
|
31
|
|
32 =head1 DESCRIPTION
|
|
33
|
|
34 Contains list of sub alignments making up a dna-dna alignment
|
|
35
|
|
36 Creation:
|
|
37
|
|
38 my $pair = new Bio::EnsEMBL::FeaturePair(
|
|
39 -start => $qstart,
|
|
40 -end => $qend,
|
|
41 -strand => $qstrand,
|
|
42 -hstart => $hstart,
|
|
43 -hend => $hend,
|
|
44 -hend => $hstrand,
|
|
45 );
|
|
46
|
|
47 my $pairaln = new Bio::EnsEMBL::Analysis::PairAlign;
|
|
48 $pairaln->addFeaturePair($pair);
|
|
49
|
|
50 Any number of pair alignments can be added to the PairAlign object
|
|
51
|
|
52 =cut
|
|
53
|
|
54 package Bio::EnsEMBL::Analysis::PairAlign;
|
|
55
|
|
56 use vars qw(@ISA);
|
|
57 use strict;
|
|
58
|
|
59
|
|
60 @ISA = qw(Bio::EnsEMBL::Root);
|
|
61
|
|
62 sub new {
|
|
63 my($class,@args) = @_;
|
|
64 my $self = {};
|
|
65 bless $self, $class;
|
|
66
|
|
67 $self->{'_homol'} = [];
|
|
68
|
|
69 return $self; # success - we hope!
|
|
70 }
|
|
71
|
|
72 sub addFeaturePair {
|
|
73 my ($self,$pair) = @_;
|
|
74
|
|
75 $self->throw("Not a Bio::EnsEMBL::FeaturePair object") unless ($pair->isa("Bio::EnsEMBL::FeaturePair"));
|
|
76
|
|
77 push(@{$self->{'_pairs'}},$pair);
|
|
78
|
|
79 }
|
|
80
|
|
81
|
|
82 =head2 eachFeaturePair
|
|
83
|
|
84 Title : eachFeaturePair
|
|
85 Example : my @pairs = $pair->eachFeaturePair
|
|
86 Returns : Array of Bio::SeqFeature::FeaturePair
|
|
87 Args : none
|
|
88 =cut
|
|
89
|
|
90 sub eachFeaturePair {
|
|
91 my ($self) = @_;
|
|
92
|
|
93 if (defined($self->{'_pairs'})) {
|
|
94 return @{$self->{'_pairs'}};
|
|
95 }
|
|
96 }
|
|
97
|
|
98 sub get_hstrand {
|
|
99 my ($self) = @_;
|
|
100
|
|
101 my @features = $self->eachFeaturePair;
|
|
102
|
|
103 return $features[0]->hstrand;
|
|
104 }
|
|
105
|
|
106 =head2 genomic2cDNA
|
|
107
|
|
108 Title : genomic2cDNA
|
|
109 Usage : my $cdna_coord = $pair->genomic2cDNA($gen_coord)
|
|
110 Function: Converts a genomic coordinate to a cdna coordinate
|
|
111 Returns : int
|
|
112 Args : int
|
|
113 =cut
|
|
114
|
|
115 sub genomic2cDNA {
|
|
116 my ($self,$coord) = @_;
|
|
117 my @pairs = $self->eachFeaturePair;
|
|
118
|
|
119 @pairs = sort {$a->start <=> $b->start} @pairs;
|
|
120
|
|
121 my $newcoord;
|
|
122
|
|
123 HOMOL: while (my $sf1 = shift(@pairs)) {
|
|
124 next HOMOL unless ($coord >= $sf1->start && $coord <= $sf1->end);
|
|
125
|
|
126 if ($sf1->strand == 1 && $sf1->hstrand == 1) {
|
|
127 $newcoord = $sf1->hstart + ($coord - $sf1->start);
|
|
128 last HOMOL;
|
|
129 } elsif ($sf1->strand == 1 && $sf1->hstrand == -1) {
|
|
130 $newcoord = $sf1->hend - ($coord - $sf1->start);
|
|
131 last HOMOL;
|
|
132 } elsif ($sf1->strand == -1 && $sf1->hstrand == 1) {
|
|
133 $newcoord = $sf1->hstart + ($sf1->end - $coord);
|
|
134 last HOMOL;
|
|
135 } elsif ($sf1->strand == -1 && $sf1->hstrand == -1) {
|
|
136 $newcoord = $sf1->hend - ($sf1->end - $coord);
|
|
137 last HOMOL;
|
|
138 } else {
|
|
139 $self->throw("ERROR: Wrong strand value in FeaturePair (" . $sf1->strand . "/" . $sf1->hstrand . "\n");
|
|
140 }
|
|
141 }
|
|
142
|
|
143 if (defined($newcoord)) {
|
|
144
|
|
145 return $newcoord;
|
|
146 } else {
|
|
147 $self->throw("Couldn't convert $coord");
|
|
148 }
|
|
149 }
|
|
150
|
|
151 =head2 cDNA2genomic
|
|
152
|
|
153 Title : cDNA2genomic
|
|
154 Usage : my $gen_coord = $pair->genomic2cDNA($cdna_coord)
|
|
155 Function: Converts a cdna coordinate to a genomic coordinate
|
|
156 Example :
|
|
157 Returns : int
|
|
158 Args : int
|
|
159
|
|
160
|
|
161 =cut
|
|
162
|
|
163 sub cDNA2genomic {
|
|
164 my ($self,$coord) = @_;
|
|
165
|
|
166 my @pairs = $self->eachFeaturePair;
|
|
167
|
|
168 my $newcoord;
|
|
169
|
|
170 HOMOL: while (my $sf1 = shift(@pairs)) {
|
|
171 next HOMOL unless ($coord >= $sf1->hstart && $coord <= $sf1->hend);
|
|
172
|
|
173 if ($sf1->strand == 1 && $sf1->hstrand == 1) {
|
|
174 $newcoord = $sf1->start + ($coord - $sf1->hstart);
|
|
175 last HOMOL;
|
|
176 } elsif ($sf1->strand == 1 && $sf1->hstrand == -1) {
|
|
177 $newcoord = $sf1->start +($sf1->hend - $coord);
|
|
178 last HOMOL;
|
|
179 } elsif ($sf1->strand == -1 && $sf1->hstrand == 1) {
|
|
180 $newcoord = $sf1->end - ($coord - $sf1->hstart);
|
|
181 last HOMOL;
|
|
182 } elsif ($sf1->strand == -1 && $sf1->hstrand == -1) {
|
|
183 $newcoord = $sf1->end - ($sf1->hend - $coord);
|
|
184 last HOMOL;
|
|
185 } else {
|
|
186 $self->throw("ERROR: Wrong strand value in homol (" . $sf1->strand . "/" . $sf1->hstrand . "\n");
|
|
187 }
|
|
188 }
|
|
189
|
|
190 if (defined ($newcoord)) {
|
|
191 return $newcoord;
|
|
192 } else {
|
|
193 $self->throw("Couldn't convert $coord\n");
|
|
194 }
|
|
195 }
|
|
196
|
|
197 sub find_Pair {
|
|
198 my ($self,$coord) = @_;
|
|
199
|
|
200 foreach my $p ($self->eachFeaturePair) {
|
|
201 if ($coord >= $p->hstart && $coord <= $p->hend) {
|
|
202 return $p;
|
|
203 }
|
|
204 }
|
|
205 }
|
|
206
|
|
207 =head2 convert_cDNA_feature
|
|
208
|
|
209 Title : convert_cDNA_feature
|
|
210 Usage : my @newfeatures = $self->convert_cDNA_feature($f);
|
|
211 Function: Converts a feature on the cDNA into an array of
|
|
212 features on the genomic (for features that span across introns);
|
|
213 Returns : Array of Bio::EnsEMBL::FeaturePair
|
|
214 Args : Bio::EnsEMBL::FeaturePair
|
|
215
|
|
216 =cut
|
|
217
|
|
218 sub convert_cDNA_feature {
|
|
219 my ($self,$feature) = @_;
|
|
220
|
|
221 my $foundstart = 0;
|
|
222 my $foundend = 0;
|
|
223
|
|
224 my @pairs = $self->eachFeaturePair;
|
|
225 my @newfeatures;
|
|
226
|
|
227 HOMOL: while (my $sf1 = shift(@pairs)) {
|
|
228 my $skip = 0;
|
|
229 #print STDERR "Looking at cDNA exon " . $sf1->hstart . "\t" . $sf1->hend . "\t" . $sf1->strand ."\n";
|
|
230
|
|
231 $skip = 1 unless ($feature->start >= $sf1->hstart
|
|
232 && $feature->start <= $sf1->hend);
|
|
233
|
|
234 if($skip){
|
|
235 #print STDERR "Skipping ".$sf1->hstart . "\t" . $sf1->hend . "\t" . $sf1->strand ."\n";
|
|
236 next HOMOL;
|
|
237 }
|
|
238 if ($feature->end >= $sf1->hstart && $feature->end <= $sf1->hend) {
|
|
239 $foundend = 1;
|
|
240 }
|
|
241
|
|
242 my $startcoord = $self->cDNA2genomic($feature->start);
|
|
243 my $endcoord;
|
|
244
|
|
245 if ($sf1->hstrand == 1) {
|
|
246 $endcoord = $sf1->end;
|
|
247 } else {
|
|
248 $endcoord = $sf1->start;
|
|
249 }
|
|
250
|
|
251 if ($foundend) {
|
|
252 $endcoord = $self->cDNA2genomic($feature->end);
|
|
253 }
|
|
254
|
|
255 #print STDERR "Making new genomic feature $startcoord\t$endcoord\n";
|
|
256
|
|
257 my $tmpf = new Bio::EnsEMBL::Feature(-seqname => $feature->seqname,
|
|
258 -start => $startcoord,
|
|
259 -end => $endcoord,
|
|
260 -strand => $feature->strand);
|
|
261 push(@newfeatures,$tmpf);
|
|
262 last;
|
|
263 }
|
|
264
|
|
265 # Now the rest of the pairs until we find the endcoord
|
|
266
|
|
267 while ((my $sf1 = shift(@pairs)) && ($foundend == 0)) {
|
|
268
|
|
269 if ($feature->end >= $sf1->hstart && $feature->end <= $sf1->hend) {
|
|
270 $foundend = 1;
|
|
271 }
|
|
272
|
|
273 my $startcoord;
|
|
274 my $endcoord;
|
|
275
|
|
276 if ($sf1->hstrand == 1) {
|
|
277 $startcoord = $sf1->start;
|
|
278 $endcoord = $sf1->end;
|
|
279 } else {
|
|
280 $startcoord = $sf1->end;
|
|
281 $endcoord = $sf1->start;
|
|
282 }
|
|
283
|
|
284 if ($foundend) {
|
|
285 $endcoord = $self->cDNA2genomic($feature->end);
|
|
286 }
|
|
287
|
|
288 # #print STDERR "Making new genomic feature $startcoord\t$endcoord\n";
|
|
289
|
|
290 my $tmpf = new Bio::EnsEMBL::Feature(-seqname => $feature->seqname,
|
|
291 -start => $startcoord,
|
|
292 -end => $endcoord,
|
|
293 -strand => $feature->strand);
|
|
294 push(@newfeatures,$tmpf);
|
|
295 }
|
|
296 #print STDERR "Have ".@newfeatures." features from ".$feature."\n";
|
|
297 return @newfeatures;
|
|
298 }
|
|
299
|
|
300
|
|
301 sub convert_FeaturePair {
|
|
302 my ($self,$pair) = @_;
|
|
303
|
|
304 my $hstrand = $self->get_hstrand;
|
|
305
|
|
306 my $feat = $self->create_Feature($pair->start, $pair->end, $pair->strand,
|
|
307 $pair->slice);
|
|
308 my @newfeatures = $self->convert_cDNA_feature($feat);
|
|
309 my @newpairs;
|
|
310
|
|
311 my $hitpairaln = new Bio::EnsEMBL::Analysis::PairAlign;
|
|
312 $hitpairaln->addFeaturePair($pair);
|
|
313
|
|
314 foreach my $new (@newfeatures) {
|
|
315
|
|
316 # Now we want to convert these cDNA coords into hit coords
|
|
317
|
|
318 my $hstart1 = $self->genomic2cDNA($new->start);
|
|
319 my $hend1 = $self->genomic2cDNA($new->end);
|
|
320
|
|
321 my $hstart2 = $hitpairaln->genomic2cDNA($hstart1);
|
|
322 my $hend2 = $hitpairaln->genomic2cDNA($hend1);
|
|
323
|
|
324 # We can now put the final feature together
|
|
325
|
|
326 my $finalstrand = $hstrand * $pair->strand * $pair->hstrand;
|
|
327
|
|
328 if ($hstart2 > $hend2) {
|
|
329 my $tmp = $hstart2;
|
|
330 $hstart2 = $hend2;
|
|
331 $hend2 = $tmp;
|
|
332 }
|
|
333
|
|
334 my $finalpair = $self->create_FeaturePair($new->start, $new->end,
|
|
335 $new->strand,
|
|
336 $hstart2, $hend2,
|
|
337 $finalstrand, $pair->score);
|
|
338
|
|
339 push(@newpairs,$finalpair);
|
|
340
|
|
341 }
|
|
342
|
|
343 return @newpairs;
|
|
344 }
|
|
345
|
|
346 sub create_FeaturePair {
|
|
347 my ($self, $start, $end, $strand, $hstart, $hend,
|
|
348 $hstrand, $score) = @_;
|
|
349
|
|
350 my $fp = Bio::EnsEMBL::FeaturePair->new(
|
|
351 -start => $start,
|
|
352 -end => $end,
|
|
353 -strand => $strand,
|
|
354 -hstart => $hstart,
|
|
355 -hend => $hend,
|
|
356 -hstrand => $hstrand,
|
|
357 -score => $score,
|
|
358 );
|
|
359
|
|
360
|
|
361 return $fp;
|
|
362 }
|
|
363
|
|
364 sub create_Feature{
|
|
365 my ($self, $start, $end, $strand, $slice) = @_;
|
|
366
|
|
367 my $feat = new Bio::EnsEMBL::Feature(-start => $start,
|
|
368 -end => $end,
|
|
369 -strand => $strand,
|
|
370 -slice => $slice,
|
|
371 );
|
|
372 return $feat;
|
|
373 }
|
|
374
|
|
375 1;
|
|
376
|
|
377
|
|
378
|
|
379
|
|
380
|
|
381
|
|
382
|
|
383
|