comparison variant_effect_predictor/Bio/EnsEMBL/Compara/PeptideAlignFeature.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 =head1 NAME - Bio::EnsEMBL::Compara::PeptideAlignFeature
2
3 =head1 SYNOPSIS
4
5 # Get an $homology object somehow
6
7 # For Homology PeptideAlignFeatures, you normally get 2 pafs,
8 # one for each member used alternatively as query and database
9 # (hit) in the blast run
10
11 my $pafs = $homology->get_all_PeptideAlignFeature;
12
13 foreach my $paf (@{$pafs}) {
14 print $paf->query_member->stable_id," ",$self->hit_member->stable_id," ",$paf->evalue,"\n";
15 }
16
17 # Other stuff in the object:
18 # $paf->qstart
19 # $paf->qend
20 # $paf->hstart
21 # $paf->hend
22 # $paf->score
23 # $paf->alignment_length
24 # $paf->identical_matches
25 # $paf->perc_ident
26 # $paf->positive_matches
27 # $paf->perc_pos
28 # $paf->hit_rank
29 # $paf->cigar_line
30
31 =head1 DESCRIPTION
32
33 =head1 CONTACT
34
35 Describe contact details here
36
37 =head1 APPENDIX
38
39 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
40
41 =cut
42
43 my $_paf_build_homology_idx = time(); #global index counter
44
45 package Bio::EnsEMBL::Compara::PeptideAlignFeature;
46
47 use strict;
48 use Bio::EnsEMBL::Compara::Homology;
49 use Bio::EnsEMBL::Utils::Exception;
50
51 #se overload '<=>' => "sort_by_score_evalue_and_pid"; # named method
52
53 sub new {
54 my ($class) = @_;
55 my $self = {};
56
57 bless $self,$class;
58
59 $self->query_member(new Bio::EnsEMBL::Compara::Member);
60 $self->hit_member(new Bio::EnsEMBL::Compara::Member);
61 return $self;
62 }
63
64
65 sub create_homology
66 {
67 my $self = shift;
68
69 # create an Homology object
70 my $homology = new Bio::EnsEMBL::Compara::Homology;
71
72 my $stable_id;
73 if($self->query_member->taxon_id < $self->hit_member->taxon_id) {
74 $stable_id = $self->query_member->taxon_id() . "_" . $self->hit_member->taxon_id . "_";
75 } else {
76 $stable_id = $self->hit_member->taxon_id . "_" . $self->query_member->taxon_id . "_";
77 }
78 $stable_id .= sprintf ("%011.0d",$_paf_build_homology_idx++);
79 $homology->stable_id($stable_id);
80 $homology->method_link_type("ENSEMBL_ORTHOLOGUES");
81
82 #
83 # QUERY member
84 #
85 my $qlen = ($self->qend - $self->qstart + 1);
86 $self->query_member->perc_cov(int($qlen*100/$self->query_member->seq_length));
87 $self->query_member->perc_id(int($self->identical_matches*100.0/$qlen));
88 $self->query_member->perc_pos(int($self->positive_matches*100/$qlen));
89
90 my $cigar_line = $self->cigar_line;
91 #print("original cigar_line '$cigar_line'\n");
92 $cigar_line =~ s/I/M/g;
93 $cigar_line = compact_cigar_line($cigar_line);
94 $self->query_member->cigar_line($cigar_line);
95 #print(" '$cigar_line'\n");
96
97 $homology->add_Member($self->query_member);
98
99 # HIT member
100 #
101 # $self->hstart and $self->hend could be stored
102 my $hlen = ($self->hend - $self->hstart + 1);
103 $self->hit_member->perc_cov(int($hlen*100/$self->hit_member->seq_length));
104 $self->hit_member->perc_id(int($self->identical_matches*100.0/$hlen));
105 $self->hit_member->perc_pos(int($self->positive_matches*100/$hlen));
106
107 $cigar_line = $self->cigar_line;
108 #print("original cigar_line\n '$cigar_line'\n");
109 $cigar_line =~ s/D/M/g;
110 $cigar_line =~ s/I/D/g;
111 $cigar_line = compact_cigar_line($cigar_line);
112 $self->hit_member->cigar_line($cigar_line);
113 #print(" '$cigar_line'\n");
114
115 $homology->add_Member($self->hit_member);
116
117 return $homology;
118 }
119
120
121 sub compact_cigar_line
122 {
123 my $cigar_line = shift;
124
125 #print("cigar_line '$cigar_line' => ");
126 my @pieces = ( $cigar_line =~ /(\d*[MDI])/g );
127 my @new_pieces = ();
128 foreach my $piece (@pieces) {
129 $piece =~ s/I/M/;
130 if (! scalar @new_pieces || $piece =~ /D/) {
131 push @new_pieces, $piece;
132 next;
133 }
134 if ($piece =~ /\d*M/ && $new_pieces[-1] =~ /\d*M/) {
135 my ($matches1) = ($piece =~ /(\d*)M/);
136 my ($matches2) = ($new_pieces[-1] =~ /(\d*)M/);
137 if (! defined $matches1 || $matches1 eq "") {
138 $matches1 = 1;
139 }
140 if (! defined $matches2 || $matches2 eq "") {
141 $matches2 = 1;
142 }
143 $new_pieces[-1] = $matches1 + $matches2 . "M";
144 } else {
145 push @new_pieces, $piece;
146 }
147 }
148 my $new_cigar_line = join("", @new_pieces);
149 #print(" '$new_cigar_line'\n");
150 return $new_cigar_line;
151 }
152
153
154 ##########################
155 #
156 # getter/setter methods
157 #
158 ##########################
159
160 sub query_member {
161 my ($self,$arg) = @_;
162
163 if (defined($arg)) {
164 throw("arg must be a [Bio::EnsEMBL::Compara::Member] not a [$arg]")
165 unless($arg->isa('Bio::EnsEMBL::Compara::Member'));
166 $self->{'_query_member'} = $arg;
167 }
168 return $self->{'_query_member'};
169 }
170
171 sub query_member_id {
172 my $self = shift;
173 $self->{'_query_member_id'} = shift if (@_);
174 if ($self->{'_query_member_id'}) {
175 return $self->{'_query_member_id'};
176 } elsif ($self->{'_query_member'} and $self->{'_query_member'}->dbID) {
177 return $self->{'_query_member'}->dbID;
178 }
179 return undef;
180 }
181
182 sub query_genome_db_id {
183 my $self = shift;
184 $self->{'_query_genome_db_id'} = shift if (@_);
185 if ($self->{'_query_genome_db_id'}) {
186 return $self->{'_query_genome_db_id'};
187 } elsif ($self->{'_query_member'} and $self->{'_query_member'}->genome_db
188 and $self->{'_query_member'}->genome_db->dbID) {
189 return $self->{'_query_member'}->genome_db->dbID;
190 }
191 return undef;
192 }
193
194 sub hit_member {
195 my ($self,$arg) = @_;
196
197 if (defined($arg)) {
198 throw("arg must be a [Bio::EnsEMBL::Compara::Member] not a [$arg]")
199 unless($arg->isa('Bio::EnsEMBL::Compara::Member'));
200 $self->{'_hit_member'} = $arg;
201 }
202 return $self->{'_hit_member'};
203 }
204
205 sub hit_member_id {
206 my $self = shift;
207 $self->{'_hit_member_id'} = shift if (@_);
208 if ($self->{'_hit_member_id'}) {
209 return $self->{'_hit_member_id'};
210 } elsif ($self->{'_hit_member'} and $self->{'_hit_member'}->dbID) {
211 return $self->{'_hit_member'}->dbID;
212 }
213 return undef;
214 }
215
216 sub hit_genome_db_id {
217 my $self = shift;
218 $self->{'_hit_genome_db_id'} = shift if (@_);
219 if ($self->{'_hit_genome_db_id'}) {
220 return $self->{'_hit_genome_db_id'};
221 } elsif ($self->{'_hit_member'} and $self->{'_hit_member'}->genome_db
222 and $self->{'_hit_member'}->genome_db->dbID) {
223 return $self->{'_hit_member'}->genome_db->dbID;
224 }
225 return undef;
226 }
227
228 sub qstart {
229 my ($self,$arg) = @_;
230
231 if (defined($arg)) {
232 $self->{_qstart} = $arg;
233 }
234 return $self->{_qstart};
235 }
236
237 sub hstart {
238 my ($self,$arg) = @_;
239
240 if (defined($arg)) {
241 $self->{_hstart} = $arg;
242 }
243 return $self->{_hstart};
244 }
245
246 sub qend {
247 my ($self,$arg) = @_;
248
249 if (defined($arg)) {
250 $self->{_qend} = $arg;
251 }
252 return $self->{_qend};
253 }
254
255 sub qlength {
256 my ($self,$arg) = @_;
257
258 if (defined($arg)) {
259 $self->{_qlength} = $arg;
260 }
261 return $self->{_qlength};
262 }
263
264 sub hend {
265 my ($self,$arg) = @_;
266
267 if (defined($arg)) {
268 $self->{_hend} = $arg;
269 }
270 return $self->{_hend};
271 }
272
273 sub hlength{
274 my ($self,$arg) = @_;
275
276 if (defined($arg)) {
277 $self->{_hlength} = $arg;
278 }
279 return $self->{_hlength};
280 }
281
282 sub score{
283 my ($self,$arg) = @_;
284
285 if (defined($arg)) {
286 $self->{_score} = $arg;
287 }
288 return $self->{_score};
289 }
290
291 sub evalue {
292 my ($self,$arg) = @_;
293
294 if (defined($arg)) {
295 $self->{_evalue} = $arg;
296 }
297 return $self->{_evalue};
298 }
299
300 sub perc_ident {
301 my ($self,$arg) = @_;
302
303 if (defined($arg)) {
304 $self->{_perc_ident} = $arg;
305 }
306 return $self->{_perc_ident};
307 }
308
309 sub perc_pos {
310 my ($self,$arg) = @_;
311
312 if (defined($arg)) {
313 $self->{_perc_pos} = $arg;
314 }
315 return $self->{_perc_pos};
316 }
317
318 sub identical_matches {
319 my ($self,$arg) = @_;
320
321 if (defined($arg)) {
322 $self->{_identical_matches} = $arg;
323 if(defined($self->alignment_length)) {
324 $self->perc_ident(int($arg*100/$self->alignment_length));
325 }
326 }
327 return $self->{_identical_matches};
328 }
329
330 sub positive_matches {
331 my ($self,$arg) = @_;
332
333 if (defined($arg)) {
334 $self->{_positive_matches} = $arg;
335 if(defined($self->alignment_length)) {
336 $self->perc_pos(int($arg*100/$self->alignment_length));
337 }
338 }
339 return $self->{_positive_matches};
340 }
341
342 sub alignment_length {
343 my ($self,$arg) = @_;
344
345 if (defined($arg)) {
346 $self->{_alignment_length} = $arg;
347 }
348 return $self->{_alignment_length};
349 }
350
351 sub cigar_line {
352 my ($self,$arg) = @_;
353
354 if (defined($arg)) {
355 $self->{_cigar_line} = $arg;
356 }
357 return $self->{_cigar_line};
358 }
359
360 sub hit_rank {
361 my ($self,$arg) = @_;
362
363 if (defined($arg)) {
364 $self->{_hit_rank} = $arg;
365 }
366 return $self->{_hit_rank};
367 }
368
369 sub dbID {
370 my ( $self, $dbID ) = @_;
371 $self->{'_dbID'} = $dbID if defined $dbID;
372 return $self->{'_dbID'};
373 }
374
375 sub rhit_dbID {
376 my ( $self, $dbID ) = @_;
377 $self->{'_rhit_dbID'} = $dbID if defined $dbID;
378 return $self->{'_rhit_dbID'};
379 }
380
381 sub display_short {
382 my $self = shift;
383 print($self->get_description(), "\n");
384 }
385
386 sub get_description {
387 my($self) = @_;
388
389 unless(defined($self)) {
390 print("qy_stable_id\t\t\thit_stable_id\t\t\tscore\talen\t\%ident\t\%positive\thit_rank\n");
391 return;
392 }
393
394 my $qm = $self->query_member;
395 my $hm = $self->hit_member;
396 my $dbID = $self->dbID; $dbID = '' unless($dbID);
397
398 my $header = "PAF(".$dbID.")";
399 $header .= "(".$self->rhit_dbID.")" if($self->rhit_dbID);
400 while(length($header)<17) { $header .= ' '; }
401
402 my $qmem = sprintf("%s(%d,%d)(%s:%d)",
403 $qm->stable_id, $self->qstart, $self->qend, $qm->chr_name, $qm->chr_start);
404 my $hmem = sprintf("%s(%d,%d)(%s:%d)",
405 $hm->stable_id, $self->hstart, $self->hend, $hm->chr_name, $hm->chr_start);
406 while(length($qmem)<50) { $qmem .= ' '; }
407 while(length($hmem)<50) { $hmem .= ' '; }
408
409
410 my $desc_string = sprintf("%s%s%s%7.3f%7d%7d%7d%7d",
411 $header, $qmem, $hmem,
412 $self->score,
413 $self->alignment_length,
414 $self->perc_ident,
415 $self->perc_pos,
416 $self->hit_rank);
417
418 return $desc_string;
419 }
420
421
422 =head2 hash_key
423 Args : none
424 Example : $somehash->{$paf->hash_key} = $someValue;
425 Description: used for keeping track of known/stored gene/gene relationships
426 Returntype : string $key
427 Exceptions : none
428 Caller : general
429 =cut
430
431 sub hash_key
432 {
433 my $self = shift;
434 my $key = '1';
435
436 return $key unless($self->query_member);
437 return $key unless($self->hit_member);
438 my $gene1 = $self->query_member->gene_member;
439 my $gene2 = $self->hit_member->gene_member;
440 $gene1 = $self->query_member unless($gene1);
441 $gene2 = $self->hit_member unless($gene2);
442 if($gene1->genome_db_id > $gene2->genome_db_id) {
443 my $temp = $gene1;
444 $gene1 = $gene2;
445 $gene2 = $temp;
446 }
447 $key = $gene1->stable_id . '_' . $gene2->stable_id;
448 return $key;
449 }
450
451 1;