Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/EnsEMBL/Compara/PeptideAlignFeature.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 =head1 NAME - Bio::EnsEMBL::Compara::PeptideAlignFeature | |
2 | |
3 =head1 SYNOPSIS | |
4 | |
5 # Get an $homology object somehow | |
6 | |
7 # For Homology PeptideAlignFeatures, you normally get 2 pafs, | |
8 # one for each member used alternatively as query and database | |
9 # (hit) in the blast run | |
10 | |
11 my $pafs = $homology->get_all_PeptideAlignFeature; | |
12 | |
13 foreach my $paf (@{$pafs}) { | |
14 print $paf->query_member->stable_id," ",$self->hit_member->stable_id," ",$paf->evalue,"\n"; | |
15 } | |
16 | |
17 # Other stuff in the object: | |
18 # $paf->qstart | |
19 # $paf->qend | |
20 # $paf->hstart | |
21 # $paf->hend | |
22 # $paf->score | |
23 # $paf->alignment_length | |
24 # $paf->identical_matches | |
25 # $paf->perc_ident | |
26 # $paf->positive_matches | |
27 # $paf->perc_pos | |
28 # $paf->hit_rank | |
29 # $paf->cigar_line | |
30 | |
31 =head1 DESCRIPTION | |
32 | |
33 =head1 CONTACT | |
34 | |
35 Describe contact details here | |
36 | |
37 =head1 APPENDIX | |
38 | |
39 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
40 | |
41 =cut | |
42 | |
43 my $_paf_build_homology_idx = time(); #global index counter | |
44 | |
45 package Bio::EnsEMBL::Compara::PeptideAlignFeature; | |
46 | |
47 use strict; | |
48 use Bio::EnsEMBL::Compara::Homology; | |
49 use Bio::EnsEMBL::Utils::Exception; | |
50 | |
51 #se overload '<=>' => "sort_by_score_evalue_and_pid"; # named method | |
52 | |
53 sub new { | |
54 my ($class) = @_; | |
55 my $self = {}; | |
56 | |
57 bless $self,$class; | |
58 | |
59 $self->query_member(new Bio::EnsEMBL::Compara::Member); | |
60 $self->hit_member(new Bio::EnsEMBL::Compara::Member); | |
61 return $self; | |
62 } | |
63 | |
64 | |
65 sub create_homology | |
66 { | |
67 my $self = shift; | |
68 | |
69 # create an Homology object | |
70 my $homology = new Bio::EnsEMBL::Compara::Homology; | |
71 | |
72 my $stable_id; | |
73 if($self->query_member->taxon_id < $self->hit_member->taxon_id) { | |
74 $stable_id = $self->query_member->taxon_id() . "_" . $self->hit_member->taxon_id . "_"; | |
75 } else { | |
76 $stable_id = $self->hit_member->taxon_id . "_" . $self->query_member->taxon_id . "_"; | |
77 } | |
78 $stable_id .= sprintf ("%011.0d",$_paf_build_homology_idx++); | |
79 $homology->stable_id($stable_id); | |
80 $homology->method_link_type("ENSEMBL_ORTHOLOGUES"); | |
81 | |
82 # | |
83 # QUERY member | |
84 # | |
85 my $qlen = ($self->qend - $self->qstart + 1); | |
86 $self->query_member->perc_cov(int($qlen*100/$self->query_member->seq_length)); | |
87 $self->query_member->perc_id(int($self->identical_matches*100.0/$qlen)); | |
88 $self->query_member->perc_pos(int($self->positive_matches*100/$qlen)); | |
89 | |
90 my $cigar_line = $self->cigar_line; | |
91 #print("original cigar_line '$cigar_line'\n"); | |
92 $cigar_line =~ s/I/M/g; | |
93 $cigar_line = compact_cigar_line($cigar_line); | |
94 $self->query_member->cigar_line($cigar_line); | |
95 #print(" '$cigar_line'\n"); | |
96 | |
97 $homology->add_Member($self->query_member); | |
98 | |
99 # HIT member | |
100 # | |
101 # $self->hstart and $self->hend could be stored | |
102 my $hlen = ($self->hend - $self->hstart + 1); | |
103 $self->hit_member->perc_cov(int($hlen*100/$self->hit_member->seq_length)); | |
104 $self->hit_member->perc_id(int($self->identical_matches*100.0/$hlen)); | |
105 $self->hit_member->perc_pos(int($self->positive_matches*100/$hlen)); | |
106 | |
107 $cigar_line = $self->cigar_line; | |
108 #print("original cigar_line\n '$cigar_line'\n"); | |
109 $cigar_line =~ s/D/M/g; | |
110 $cigar_line =~ s/I/D/g; | |
111 $cigar_line = compact_cigar_line($cigar_line); | |
112 $self->hit_member->cigar_line($cigar_line); | |
113 #print(" '$cigar_line'\n"); | |
114 | |
115 $homology->add_Member($self->hit_member); | |
116 | |
117 return $homology; | |
118 } | |
119 | |
120 | |
121 sub compact_cigar_line | |
122 { | |
123 my $cigar_line = shift; | |
124 | |
125 #print("cigar_line '$cigar_line' => "); | |
126 my @pieces = ( $cigar_line =~ /(\d*[MDI])/g ); | |
127 my @new_pieces = (); | |
128 foreach my $piece (@pieces) { | |
129 $piece =~ s/I/M/; | |
130 if (! scalar @new_pieces || $piece =~ /D/) { | |
131 push @new_pieces, $piece; | |
132 next; | |
133 } | |
134 if ($piece =~ /\d*M/ && $new_pieces[-1] =~ /\d*M/) { | |
135 my ($matches1) = ($piece =~ /(\d*)M/); | |
136 my ($matches2) = ($new_pieces[-1] =~ /(\d*)M/); | |
137 if (! defined $matches1 || $matches1 eq "") { | |
138 $matches1 = 1; | |
139 } | |
140 if (! defined $matches2 || $matches2 eq "") { | |
141 $matches2 = 1; | |
142 } | |
143 $new_pieces[-1] = $matches1 + $matches2 . "M"; | |
144 } else { | |
145 push @new_pieces, $piece; | |
146 } | |
147 } | |
148 my $new_cigar_line = join("", @new_pieces); | |
149 #print(" '$new_cigar_line'\n"); | |
150 return $new_cigar_line; | |
151 } | |
152 | |
153 | |
154 ########################## | |
155 # | |
156 # getter/setter methods | |
157 # | |
158 ########################## | |
159 | |
160 sub query_member { | |
161 my ($self,$arg) = @_; | |
162 | |
163 if (defined($arg)) { | |
164 throw("arg must be a [Bio::EnsEMBL::Compara::Member] not a [$arg]") | |
165 unless($arg->isa('Bio::EnsEMBL::Compara::Member')); | |
166 $self->{'_query_member'} = $arg; | |
167 } | |
168 return $self->{'_query_member'}; | |
169 } | |
170 | |
171 sub query_member_id { | |
172 my $self = shift; | |
173 $self->{'_query_member_id'} = shift if (@_); | |
174 if ($self->{'_query_member_id'}) { | |
175 return $self->{'_query_member_id'}; | |
176 } elsif ($self->{'_query_member'} and $self->{'_query_member'}->dbID) { | |
177 return $self->{'_query_member'}->dbID; | |
178 } | |
179 return undef; | |
180 } | |
181 | |
182 sub query_genome_db_id { | |
183 my $self = shift; | |
184 $self->{'_query_genome_db_id'} = shift if (@_); | |
185 if ($self->{'_query_genome_db_id'}) { | |
186 return $self->{'_query_genome_db_id'}; | |
187 } elsif ($self->{'_query_member'} and $self->{'_query_member'}->genome_db | |
188 and $self->{'_query_member'}->genome_db->dbID) { | |
189 return $self->{'_query_member'}->genome_db->dbID; | |
190 } | |
191 return undef; | |
192 } | |
193 | |
194 sub hit_member { | |
195 my ($self,$arg) = @_; | |
196 | |
197 if (defined($arg)) { | |
198 throw("arg must be a [Bio::EnsEMBL::Compara::Member] not a [$arg]") | |
199 unless($arg->isa('Bio::EnsEMBL::Compara::Member')); | |
200 $self->{'_hit_member'} = $arg; | |
201 } | |
202 return $self->{'_hit_member'}; | |
203 } | |
204 | |
205 sub hit_member_id { | |
206 my $self = shift; | |
207 $self->{'_hit_member_id'} = shift if (@_); | |
208 if ($self->{'_hit_member_id'}) { | |
209 return $self->{'_hit_member_id'}; | |
210 } elsif ($self->{'_hit_member'} and $self->{'_hit_member'}->dbID) { | |
211 return $self->{'_hit_member'}->dbID; | |
212 } | |
213 return undef; | |
214 } | |
215 | |
216 sub hit_genome_db_id { | |
217 my $self = shift; | |
218 $self->{'_hit_genome_db_id'} = shift if (@_); | |
219 if ($self->{'_hit_genome_db_id'}) { | |
220 return $self->{'_hit_genome_db_id'}; | |
221 } elsif ($self->{'_hit_member'} and $self->{'_hit_member'}->genome_db | |
222 and $self->{'_hit_member'}->genome_db->dbID) { | |
223 return $self->{'_hit_member'}->genome_db->dbID; | |
224 } | |
225 return undef; | |
226 } | |
227 | |
228 sub qstart { | |
229 my ($self,$arg) = @_; | |
230 | |
231 if (defined($arg)) { | |
232 $self->{_qstart} = $arg; | |
233 } | |
234 return $self->{_qstart}; | |
235 } | |
236 | |
237 sub hstart { | |
238 my ($self,$arg) = @_; | |
239 | |
240 if (defined($arg)) { | |
241 $self->{_hstart} = $arg; | |
242 } | |
243 return $self->{_hstart}; | |
244 } | |
245 | |
246 sub qend { | |
247 my ($self,$arg) = @_; | |
248 | |
249 if (defined($arg)) { | |
250 $self->{_qend} = $arg; | |
251 } | |
252 return $self->{_qend}; | |
253 } | |
254 | |
255 sub qlength { | |
256 my ($self,$arg) = @_; | |
257 | |
258 if (defined($arg)) { | |
259 $self->{_qlength} = $arg; | |
260 } | |
261 return $self->{_qlength}; | |
262 } | |
263 | |
264 sub hend { | |
265 my ($self,$arg) = @_; | |
266 | |
267 if (defined($arg)) { | |
268 $self->{_hend} = $arg; | |
269 } | |
270 return $self->{_hend}; | |
271 } | |
272 | |
273 sub hlength{ | |
274 my ($self,$arg) = @_; | |
275 | |
276 if (defined($arg)) { | |
277 $self->{_hlength} = $arg; | |
278 } | |
279 return $self->{_hlength}; | |
280 } | |
281 | |
282 sub score{ | |
283 my ($self,$arg) = @_; | |
284 | |
285 if (defined($arg)) { | |
286 $self->{_score} = $arg; | |
287 } | |
288 return $self->{_score}; | |
289 } | |
290 | |
291 sub evalue { | |
292 my ($self,$arg) = @_; | |
293 | |
294 if (defined($arg)) { | |
295 $self->{_evalue} = $arg; | |
296 } | |
297 return $self->{_evalue}; | |
298 } | |
299 | |
300 sub perc_ident { | |
301 my ($self,$arg) = @_; | |
302 | |
303 if (defined($arg)) { | |
304 $self->{_perc_ident} = $arg; | |
305 } | |
306 return $self->{_perc_ident}; | |
307 } | |
308 | |
309 sub perc_pos { | |
310 my ($self,$arg) = @_; | |
311 | |
312 if (defined($arg)) { | |
313 $self->{_perc_pos} = $arg; | |
314 } | |
315 return $self->{_perc_pos}; | |
316 } | |
317 | |
318 sub identical_matches { | |
319 my ($self,$arg) = @_; | |
320 | |
321 if (defined($arg)) { | |
322 $self->{_identical_matches} = $arg; | |
323 if(defined($self->alignment_length)) { | |
324 $self->perc_ident(int($arg*100/$self->alignment_length)); | |
325 } | |
326 } | |
327 return $self->{_identical_matches}; | |
328 } | |
329 | |
330 sub positive_matches { | |
331 my ($self,$arg) = @_; | |
332 | |
333 if (defined($arg)) { | |
334 $self->{_positive_matches} = $arg; | |
335 if(defined($self->alignment_length)) { | |
336 $self->perc_pos(int($arg*100/$self->alignment_length)); | |
337 } | |
338 } | |
339 return $self->{_positive_matches}; | |
340 } | |
341 | |
342 sub alignment_length { | |
343 my ($self,$arg) = @_; | |
344 | |
345 if (defined($arg)) { | |
346 $self->{_alignment_length} = $arg; | |
347 } | |
348 return $self->{_alignment_length}; | |
349 } | |
350 | |
351 sub cigar_line { | |
352 my ($self,$arg) = @_; | |
353 | |
354 if (defined($arg)) { | |
355 $self->{_cigar_line} = $arg; | |
356 } | |
357 return $self->{_cigar_line}; | |
358 } | |
359 | |
360 sub hit_rank { | |
361 my ($self,$arg) = @_; | |
362 | |
363 if (defined($arg)) { | |
364 $self->{_hit_rank} = $arg; | |
365 } | |
366 return $self->{_hit_rank}; | |
367 } | |
368 | |
369 sub dbID { | |
370 my ( $self, $dbID ) = @_; | |
371 $self->{'_dbID'} = $dbID if defined $dbID; | |
372 return $self->{'_dbID'}; | |
373 } | |
374 | |
375 sub rhit_dbID { | |
376 my ( $self, $dbID ) = @_; | |
377 $self->{'_rhit_dbID'} = $dbID if defined $dbID; | |
378 return $self->{'_rhit_dbID'}; | |
379 } | |
380 | |
381 sub display_short { | |
382 my $self = shift; | |
383 print($self->get_description(), "\n"); | |
384 } | |
385 | |
386 sub get_description { | |
387 my($self) = @_; | |
388 | |
389 unless(defined($self)) { | |
390 print("qy_stable_id\t\t\thit_stable_id\t\t\tscore\talen\t\%ident\t\%positive\thit_rank\n"); | |
391 return; | |
392 } | |
393 | |
394 my $qm = $self->query_member; | |
395 my $hm = $self->hit_member; | |
396 my $dbID = $self->dbID; $dbID = '' unless($dbID); | |
397 | |
398 my $header = "PAF(".$dbID.")"; | |
399 $header .= "(".$self->rhit_dbID.")" if($self->rhit_dbID); | |
400 while(length($header)<17) { $header .= ' '; } | |
401 | |
402 my $qmem = sprintf("%s(%d,%d)(%s:%d)", | |
403 $qm->stable_id, $self->qstart, $self->qend, $qm->chr_name, $qm->chr_start); | |
404 my $hmem = sprintf("%s(%d,%d)(%s:%d)", | |
405 $hm->stable_id, $self->hstart, $self->hend, $hm->chr_name, $hm->chr_start); | |
406 while(length($qmem)<50) { $qmem .= ' '; } | |
407 while(length($hmem)<50) { $hmem .= ' '; } | |
408 | |
409 | |
410 my $desc_string = sprintf("%s%s%s%7.3f%7d%7d%7d%7d", | |
411 $header, $qmem, $hmem, | |
412 $self->score, | |
413 $self->alignment_length, | |
414 $self->perc_ident, | |
415 $self->perc_pos, | |
416 $self->hit_rank); | |
417 | |
418 return $desc_string; | |
419 } | |
420 | |
421 | |
422 =head2 hash_key | |
423 Args : none | |
424 Example : $somehash->{$paf->hash_key} = $someValue; | |
425 Description: used for keeping track of known/stored gene/gene relationships | |
426 Returntype : string $key | |
427 Exceptions : none | |
428 Caller : general | |
429 =cut | |
430 | |
431 sub hash_key | |
432 { | |
433 my $self = shift; | |
434 my $key = '1'; | |
435 | |
436 return $key unless($self->query_member); | |
437 return $key unless($self->hit_member); | |
438 my $gene1 = $self->query_member->gene_member; | |
439 my $gene2 = $self->hit_member->gene_member; | |
440 $gene1 = $self->query_member unless($gene1); | |
441 $gene2 = $self->hit_member unless($gene2); | |
442 if($gene1->genome_db_id > $gene2->genome_db_id) { | |
443 my $temp = $gene1; | |
444 $gene1 = $gene2; | |
445 $gene2 = $temp; | |
446 } | |
447 $key = $gene1->stable_id . '_' . $gene2->stable_id; | |
448 return $key; | |
449 } | |
450 | |
451 1; |