0
|
1 =head1 NAME - Bio::EnsEMBL::Compara::PeptideAlignFeature
|
|
2
|
|
3 =head1 SYNOPSIS
|
|
4
|
|
5 # Get an $homology object somehow
|
|
6
|
|
7 # For Homology PeptideAlignFeatures, you normally get 2 pafs,
|
|
8 # one for each member used alternatively as query and database
|
|
9 # (hit) in the blast run
|
|
10
|
|
11 my $pafs = $homology->get_all_PeptideAlignFeature;
|
|
12
|
|
13 foreach my $paf (@{$pafs}) {
|
|
14 print $paf->query_member->stable_id," ",$self->hit_member->stable_id," ",$paf->evalue,"\n";
|
|
15 }
|
|
16
|
|
17 # Other stuff in the object:
|
|
18 # $paf->qstart
|
|
19 # $paf->qend
|
|
20 # $paf->hstart
|
|
21 # $paf->hend
|
|
22 # $paf->score
|
|
23 # $paf->alignment_length
|
|
24 # $paf->identical_matches
|
|
25 # $paf->perc_ident
|
|
26 # $paf->positive_matches
|
|
27 # $paf->perc_pos
|
|
28 # $paf->hit_rank
|
|
29 # $paf->cigar_line
|
|
30
|
|
31 =head1 DESCRIPTION
|
|
32
|
|
33 =head1 CONTACT
|
|
34
|
|
35 Describe contact details here
|
|
36
|
|
37 =head1 APPENDIX
|
|
38
|
|
39 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
|
|
40
|
|
41 =cut
|
|
42
|
|
43 my $_paf_build_homology_idx = time(); #global index counter
|
|
44
|
|
45 package Bio::EnsEMBL::Compara::PeptideAlignFeature;
|
|
46
|
|
47 use strict;
|
|
48 use Bio::EnsEMBL::Compara::Homology;
|
|
49 use Bio::EnsEMBL::Utils::Exception;
|
|
50
|
|
51 #se overload '<=>' => "sort_by_score_evalue_and_pid"; # named method
|
|
52
|
|
53 sub new {
|
|
54 my ($class) = @_;
|
|
55 my $self = {};
|
|
56
|
|
57 bless $self,$class;
|
|
58
|
|
59 $self->query_member(new Bio::EnsEMBL::Compara::Member);
|
|
60 $self->hit_member(new Bio::EnsEMBL::Compara::Member);
|
|
61 return $self;
|
|
62 }
|
|
63
|
|
64
|
|
65 sub create_homology
|
|
66 {
|
|
67 my $self = shift;
|
|
68
|
|
69 # create an Homology object
|
|
70 my $homology = new Bio::EnsEMBL::Compara::Homology;
|
|
71
|
|
72 my $stable_id;
|
|
73 if($self->query_member->taxon_id < $self->hit_member->taxon_id) {
|
|
74 $stable_id = $self->query_member->taxon_id() . "_" . $self->hit_member->taxon_id . "_";
|
|
75 } else {
|
|
76 $stable_id = $self->hit_member->taxon_id . "_" . $self->query_member->taxon_id . "_";
|
|
77 }
|
|
78 $stable_id .= sprintf ("%011.0d",$_paf_build_homology_idx++);
|
|
79 $homology->stable_id($stable_id);
|
|
80 $homology->method_link_type("ENSEMBL_ORTHOLOGUES");
|
|
81
|
|
82 #
|
|
83 # QUERY member
|
|
84 #
|
|
85 my $qlen = ($self->qend - $self->qstart + 1);
|
|
86 $self->query_member->perc_cov(int($qlen*100/$self->query_member->seq_length));
|
|
87 $self->query_member->perc_id(int($self->identical_matches*100.0/$qlen));
|
|
88 $self->query_member->perc_pos(int($self->positive_matches*100/$qlen));
|
|
89
|
|
90 my $cigar_line = $self->cigar_line;
|
|
91 #print("original cigar_line '$cigar_line'\n");
|
|
92 $cigar_line =~ s/I/M/g;
|
|
93 $cigar_line = compact_cigar_line($cigar_line);
|
|
94 $self->query_member->cigar_line($cigar_line);
|
|
95 #print(" '$cigar_line'\n");
|
|
96
|
|
97 $homology->add_Member($self->query_member);
|
|
98
|
|
99 # HIT member
|
|
100 #
|
|
101 # $self->hstart and $self->hend could be stored
|
|
102 my $hlen = ($self->hend - $self->hstart + 1);
|
|
103 $self->hit_member->perc_cov(int($hlen*100/$self->hit_member->seq_length));
|
|
104 $self->hit_member->perc_id(int($self->identical_matches*100.0/$hlen));
|
|
105 $self->hit_member->perc_pos(int($self->positive_matches*100/$hlen));
|
|
106
|
|
107 $cigar_line = $self->cigar_line;
|
|
108 #print("original cigar_line\n '$cigar_line'\n");
|
|
109 $cigar_line =~ s/D/M/g;
|
|
110 $cigar_line =~ s/I/D/g;
|
|
111 $cigar_line = compact_cigar_line($cigar_line);
|
|
112 $self->hit_member->cigar_line($cigar_line);
|
|
113 #print(" '$cigar_line'\n");
|
|
114
|
|
115 $homology->add_Member($self->hit_member);
|
|
116
|
|
117 return $homology;
|
|
118 }
|
|
119
|
|
120
|
|
121 sub compact_cigar_line
|
|
122 {
|
|
123 my $cigar_line = shift;
|
|
124
|
|
125 #print("cigar_line '$cigar_line' => ");
|
|
126 my @pieces = ( $cigar_line =~ /(\d*[MDI])/g );
|
|
127 my @new_pieces = ();
|
|
128 foreach my $piece (@pieces) {
|
|
129 $piece =~ s/I/M/;
|
|
130 if (! scalar @new_pieces || $piece =~ /D/) {
|
|
131 push @new_pieces, $piece;
|
|
132 next;
|
|
133 }
|
|
134 if ($piece =~ /\d*M/ && $new_pieces[-1] =~ /\d*M/) {
|
|
135 my ($matches1) = ($piece =~ /(\d*)M/);
|
|
136 my ($matches2) = ($new_pieces[-1] =~ /(\d*)M/);
|
|
137 if (! defined $matches1 || $matches1 eq "") {
|
|
138 $matches1 = 1;
|
|
139 }
|
|
140 if (! defined $matches2 || $matches2 eq "") {
|
|
141 $matches2 = 1;
|
|
142 }
|
|
143 $new_pieces[-1] = $matches1 + $matches2 . "M";
|
|
144 } else {
|
|
145 push @new_pieces, $piece;
|
|
146 }
|
|
147 }
|
|
148 my $new_cigar_line = join("", @new_pieces);
|
|
149 #print(" '$new_cigar_line'\n");
|
|
150 return $new_cigar_line;
|
|
151 }
|
|
152
|
|
153
|
|
154 ##########################
|
|
155 #
|
|
156 # getter/setter methods
|
|
157 #
|
|
158 ##########################
|
|
159
|
|
160 sub query_member {
|
|
161 my ($self,$arg) = @_;
|
|
162
|
|
163 if (defined($arg)) {
|
|
164 throw("arg must be a [Bio::EnsEMBL::Compara::Member] not a [$arg]")
|
|
165 unless($arg->isa('Bio::EnsEMBL::Compara::Member'));
|
|
166 $self->{'_query_member'} = $arg;
|
|
167 }
|
|
168 return $self->{'_query_member'};
|
|
169 }
|
|
170
|
|
171 sub query_member_id {
|
|
172 my $self = shift;
|
|
173 $self->{'_query_member_id'} = shift if (@_);
|
|
174 if ($self->{'_query_member_id'}) {
|
|
175 return $self->{'_query_member_id'};
|
|
176 } elsif ($self->{'_query_member'} and $self->{'_query_member'}->dbID) {
|
|
177 return $self->{'_query_member'}->dbID;
|
|
178 }
|
|
179 return undef;
|
|
180 }
|
|
181
|
|
182 sub query_genome_db_id {
|
|
183 my $self = shift;
|
|
184 $self->{'_query_genome_db_id'} = shift if (@_);
|
|
185 if ($self->{'_query_genome_db_id'}) {
|
|
186 return $self->{'_query_genome_db_id'};
|
|
187 } elsif ($self->{'_query_member'} and $self->{'_query_member'}->genome_db
|
|
188 and $self->{'_query_member'}->genome_db->dbID) {
|
|
189 return $self->{'_query_member'}->genome_db->dbID;
|
|
190 }
|
|
191 return undef;
|
|
192 }
|
|
193
|
|
194 sub hit_member {
|
|
195 my ($self,$arg) = @_;
|
|
196
|
|
197 if (defined($arg)) {
|
|
198 throw("arg must be a [Bio::EnsEMBL::Compara::Member] not a [$arg]")
|
|
199 unless($arg->isa('Bio::EnsEMBL::Compara::Member'));
|
|
200 $self->{'_hit_member'} = $arg;
|
|
201 }
|
|
202 return $self->{'_hit_member'};
|
|
203 }
|
|
204
|
|
205 sub hit_member_id {
|
|
206 my $self = shift;
|
|
207 $self->{'_hit_member_id'} = shift if (@_);
|
|
208 if ($self->{'_hit_member_id'}) {
|
|
209 return $self->{'_hit_member_id'};
|
|
210 } elsif ($self->{'_hit_member'} and $self->{'_hit_member'}->dbID) {
|
|
211 return $self->{'_hit_member'}->dbID;
|
|
212 }
|
|
213 return undef;
|
|
214 }
|
|
215
|
|
216 sub hit_genome_db_id {
|
|
217 my $self = shift;
|
|
218 $self->{'_hit_genome_db_id'} = shift if (@_);
|
|
219 if ($self->{'_hit_genome_db_id'}) {
|
|
220 return $self->{'_hit_genome_db_id'};
|
|
221 } elsif ($self->{'_hit_member'} and $self->{'_hit_member'}->genome_db
|
|
222 and $self->{'_hit_member'}->genome_db->dbID) {
|
|
223 return $self->{'_hit_member'}->genome_db->dbID;
|
|
224 }
|
|
225 return undef;
|
|
226 }
|
|
227
|
|
228 sub qstart {
|
|
229 my ($self,$arg) = @_;
|
|
230
|
|
231 if (defined($arg)) {
|
|
232 $self->{_qstart} = $arg;
|
|
233 }
|
|
234 return $self->{_qstart};
|
|
235 }
|
|
236
|
|
237 sub hstart {
|
|
238 my ($self,$arg) = @_;
|
|
239
|
|
240 if (defined($arg)) {
|
|
241 $self->{_hstart} = $arg;
|
|
242 }
|
|
243 return $self->{_hstart};
|
|
244 }
|
|
245
|
|
246 sub qend {
|
|
247 my ($self,$arg) = @_;
|
|
248
|
|
249 if (defined($arg)) {
|
|
250 $self->{_qend} = $arg;
|
|
251 }
|
|
252 return $self->{_qend};
|
|
253 }
|
|
254
|
|
255 sub qlength {
|
|
256 my ($self,$arg) = @_;
|
|
257
|
|
258 if (defined($arg)) {
|
|
259 $self->{_qlength} = $arg;
|
|
260 }
|
|
261 return $self->{_qlength};
|
|
262 }
|
|
263
|
|
264 sub hend {
|
|
265 my ($self,$arg) = @_;
|
|
266
|
|
267 if (defined($arg)) {
|
|
268 $self->{_hend} = $arg;
|
|
269 }
|
|
270 return $self->{_hend};
|
|
271 }
|
|
272
|
|
273 sub hlength{
|
|
274 my ($self,$arg) = @_;
|
|
275
|
|
276 if (defined($arg)) {
|
|
277 $self->{_hlength} = $arg;
|
|
278 }
|
|
279 return $self->{_hlength};
|
|
280 }
|
|
281
|
|
282 sub score{
|
|
283 my ($self,$arg) = @_;
|
|
284
|
|
285 if (defined($arg)) {
|
|
286 $self->{_score} = $arg;
|
|
287 }
|
|
288 return $self->{_score};
|
|
289 }
|
|
290
|
|
291 sub evalue {
|
|
292 my ($self,$arg) = @_;
|
|
293
|
|
294 if (defined($arg)) {
|
|
295 $self->{_evalue} = $arg;
|
|
296 }
|
|
297 return $self->{_evalue};
|
|
298 }
|
|
299
|
|
300 sub perc_ident {
|
|
301 my ($self,$arg) = @_;
|
|
302
|
|
303 if (defined($arg)) {
|
|
304 $self->{_perc_ident} = $arg;
|
|
305 }
|
|
306 return $self->{_perc_ident};
|
|
307 }
|
|
308
|
|
309 sub perc_pos {
|
|
310 my ($self,$arg) = @_;
|
|
311
|
|
312 if (defined($arg)) {
|
|
313 $self->{_perc_pos} = $arg;
|
|
314 }
|
|
315 return $self->{_perc_pos};
|
|
316 }
|
|
317
|
|
318 sub identical_matches {
|
|
319 my ($self,$arg) = @_;
|
|
320
|
|
321 if (defined($arg)) {
|
|
322 $self->{_identical_matches} = $arg;
|
|
323 if(defined($self->alignment_length)) {
|
|
324 $self->perc_ident(int($arg*100/$self->alignment_length));
|
|
325 }
|
|
326 }
|
|
327 return $self->{_identical_matches};
|
|
328 }
|
|
329
|
|
330 sub positive_matches {
|
|
331 my ($self,$arg) = @_;
|
|
332
|
|
333 if (defined($arg)) {
|
|
334 $self->{_positive_matches} = $arg;
|
|
335 if(defined($self->alignment_length)) {
|
|
336 $self->perc_pos(int($arg*100/$self->alignment_length));
|
|
337 }
|
|
338 }
|
|
339 return $self->{_positive_matches};
|
|
340 }
|
|
341
|
|
342 sub alignment_length {
|
|
343 my ($self,$arg) = @_;
|
|
344
|
|
345 if (defined($arg)) {
|
|
346 $self->{_alignment_length} = $arg;
|
|
347 }
|
|
348 return $self->{_alignment_length};
|
|
349 }
|
|
350
|
|
351 sub cigar_line {
|
|
352 my ($self,$arg) = @_;
|
|
353
|
|
354 if (defined($arg)) {
|
|
355 $self->{_cigar_line} = $arg;
|
|
356 }
|
|
357 return $self->{_cigar_line};
|
|
358 }
|
|
359
|
|
360 sub hit_rank {
|
|
361 my ($self,$arg) = @_;
|
|
362
|
|
363 if (defined($arg)) {
|
|
364 $self->{_hit_rank} = $arg;
|
|
365 }
|
|
366 return $self->{_hit_rank};
|
|
367 }
|
|
368
|
|
369 sub dbID {
|
|
370 my ( $self, $dbID ) = @_;
|
|
371 $self->{'_dbID'} = $dbID if defined $dbID;
|
|
372 return $self->{'_dbID'};
|
|
373 }
|
|
374
|
|
375 sub rhit_dbID {
|
|
376 my ( $self, $dbID ) = @_;
|
|
377 $self->{'_rhit_dbID'} = $dbID if defined $dbID;
|
|
378 return $self->{'_rhit_dbID'};
|
|
379 }
|
|
380
|
|
381 sub display_short {
|
|
382 my $self = shift;
|
|
383 print($self->get_description(), "\n");
|
|
384 }
|
|
385
|
|
386 sub get_description {
|
|
387 my($self) = @_;
|
|
388
|
|
389 unless(defined($self)) {
|
|
390 print("qy_stable_id\t\t\thit_stable_id\t\t\tscore\talen\t\%ident\t\%positive\thit_rank\n");
|
|
391 return;
|
|
392 }
|
|
393
|
|
394 my $qm = $self->query_member;
|
|
395 my $hm = $self->hit_member;
|
|
396 my $dbID = $self->dbID; $dbID = '' unless($dbID);
|
|
397
|
|
398 my $header = "PAF(".$dbID.")";
|
|
399 $header .= "(".$self->rhit_dbID.")" if($self->rhit_dbID);
|
|
400 while(length($header)<17) { $header .= ' '; }
|
|
401
|
|
402 my $qmem = sprintf("%s(%d,%d)(%s:%d)",
|
|
403 $qm->stable_id, $self->qstart, $self->qend, $qm->chr_name, $qm->chr_start);
|
|
404 my $hmem = sprintf("%s(%d,%d)(%s:%d)",
|
|
405 $hm->stable_id, $self->hstart, $self->hend, $hm->chr_name, $hm->chr_start);
|
|
406 while(length($qmem)<50) { $qmem .= ' '; }
|
|
407 while(length($hmem)<50) { $hmem .= ' '; }
|
|
408
|
|
409
|
|
410 my $desc_string = sprintf("%s%s%s%7.3f%7d%7d%7d%7d",
|
|
411 $header, $qmem, $hmem,
|
|
412 $self->score,
|
|
413 $self->alignment_length,
|
|
414 $self->perc_ident,
|
|
415 $self->perc_pos,
|
|
416 $self->hit_rank);
|
|
417
|
|
418 return $desc_string;
|
|
419 }
|
|
420
|
|
421
|
|
422 =head2 hash_key
|
|
423 Args : none
|
|
424 Example : $somehash->{$paf->hash_key} = $someValue;
|
|
425 Description: used for keeping track of known/stored gene/gene relationships
|
|
426 Returntype : string $key
|
|
427 Exceptions : none
|
|
428 Caller : general
|
|
429 =cut
|
|
430
|
|
431 sub hash_key
|
|
432 {
|
|
433 my $self = shift;
|
|
434 my $key = '1';
|
|
435
|
|
436 return $key unless($self->query_member);
|
|
437 return $key unless($self->hit_member);
|
|
438 my $gene1 = $self->query_member->gene_member;
|
|
439 my $gene2 = $self->hit_member->gene_member;
|
|
440 $gene1 = $self->query_member unless($gene1);
|
|
441 $gene2 = $self->hit_member unless($gene2);
|
|
442 if($gene1->genome_db_id > $gene2->genome_db_id) {
|
|
443 my $temp = $gene1;
|
|
444 $gene1 = $gene2;
|
|
445 $gene2 = $temp;
|
|
446 }
|
|
447 $key = $gene1->stable_id . '_' . $gene2->stable_id;
|
|
448 return $key;
|
|
449 }
|
|
450
|
|
451 1;
|