Mercurial > repos > willmclaren > ensembl_vep
comparison variant_effect_predictor/Bio/EnsEMBL/Compara/MethodLinkSpeciesSet.pm @ 0:21066c0abaf5 draft
Uploaded
author | willmclaren |
---|---|
date | Fri, 03 Aug 2012 10:04:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:21066c0abaf5 |
---|---|
1 =head1 LICENSE | |
2 | |
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
4 Genome Research Limited. All rights reserved. | |
5 | |
6 This software is distributed under a modified Apache license. | |
7 For license details, please see | |
8 | |
9 http://www.ensembl.org/info/about/code_licence.html | |
10 | |
11 =head1 CONTACT | |
12 | |
13 Please email comments or questions to the public Ensembl | |
14 developers list at <dev@ensembl.org>. | |
15 | |
16 Questions may also be sent to the Ensembl help desk at | |
17 <helpdesk@ensembl.org>. | |
18 | |
19 =head1 NAME | |
20 | |
21 Bio::EnsEMBL::Compara::MethodLinkSpeciesSet - | |
22 Relates every method_link with the species_set for which it has been used | |
23 | |
24 =head1 SYNOPSIS | |
25 | |
26 use Bio::EnsEMBL::Compara::MethodLinkSpeciesSet; | |
27 my $method_link_species_set = Bio::EnsEMBL::Compara::MethodLinkSpeciesSet->new( | |
28 -adaptor => $method_link_species_set_adaptor, | |
29 -method => Bio::EnsEMBL::Compara::Method->new( -type => 'MULTIZ'), | |
30 -species_set_obj => Bio::EnsEMBL::Compara::SpeciesSet->new( -genome_dbs => [$gdb1, $gdb2, $gdb3]), | |
31 -max_alignment_length => 10000, | |
32 ); | |
33 | |
34 SET VALUES | |
35 $method_link_species_set->dbID( 12 ); | |
36 $method_link_species_set->adaptor( $mlss_adaptor ); | |
37 $method_link_species_set->method( Bio::EnsEMBL::Compara::Method->new( -type => 'MULTIZ') ); | |
38 $method_link_species_set->species_set( Bio::EnsEMBL::Compara::SpeciesSet->new( -genome_dbs => [$gdb1, $gdb2, $gdb3]) ); | |
39 $method_link_species_set->max_alignment_length( 10000 ); | |
40 | |
41 GET VALUES | |
42 my $mlss_id = $method_link_species_set->dbID(); | |
43 my $mlss_adaptor = $method_link_species_set->adaptor(); | |
44 my $method = $method_link_species_set->method(); | |
45 my $method_link_id = $method_link_species_set->method->dbID(); | |
46 my $method_link_type = $method_link_species_set->method->type(); | |
47 my $species_set = $method_link_species_set->species_set_obj(); | |
48 my $species_set_id = $method_link_species_set->species_set_obj->dbID(); | |
49 my $genome_dbs = $method_link_species_set->species_set_obj->genome_dbs(); | |
50 my $max_alignment_length = $method_link_species_set->max_alignment_length(); | |
51 | |
52 =head1 APPENDIX | |
53 | |
54 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ | |
55 | |
56 =cut | |
57 | |
58 | |
59 | |
60 package Bio::EnsEMBL::Compara::MethodLinkSpeciesSet; | |
61 | |
62 use strict; | |
63 | |
64 use Bio::EnsEMBL::Utils::Exception qw(throw warning deprecate); | |
65 use Bio::EnsEMBL::Utils::Argument qw(rearrange); | |
66 use Bio::EnsEMBL::Compara::Method; | |
67 use Bio::EnsEMBL::Compara::SpeciesSet; | |
68 | |
69 use base ( 'Bio::EnsEMBL::Storable', # inherit dbID(), adaptor() and new() methods | |
70 'Bio::EnsEMBL::Compara::Taggable' # inherit everything related to tagability | |
71 ); | |
72 | |
73 my $DEFAULT_MAX_ALIGNMENT = 20000; | |
74 | |
75 | |
76 =head2 new (CONSTRUCTOR) | |
77 | |
78 Arg [-DBID] : (opt.) int $dbID (the database internal ID for this object) | |
79 Arg [-ADAPTOR] : (opt.) Bio::EnsEMBL::Compara::DBSQL::MethodLinkSpeciesSetAdaptor $adaptor | |
80 (the adaptor for connecting to the database) | |
81 Arg [-METHOD] : Bio::EnsEMBL::Compara::Method $method object | |
82 Arg [-SPECIES_SET_OBJ]: Bio::EnsEMBL::Compara::SpeciesSet $species_set object | |
83 Arg [-NAME] : (opt.) string $name (the name for this method_link_species_set) | |
84 Arg [-SOURCE] : (opt.) string $source (the source of these data) | |
85 Arg [-URL] : (opt.) string $url (the original url of these data) | |
86 Arg [-MAX_ALGINMENT_LENGTH] | |
87 : (opt.) int $max_alignment_length (the length of the largest alignment | |
88 for this MethodLinkSpeciesSet (only used for genomic alignments) | |
89 Example : my $method_link_species_set = Bio::EnsEMBL::Compara::MethodLinkSpeciesSet->new( | |
90 -adaptor => $method_link_species_set_adaptor, | |
91 -method => Bio::EnsEMBL::Compara::Method->new( -type => 'MULTIZ' ), | |
92 -species_set => [$gdb1, $gdb2, $gdb3], | |
93 -max_alignment_length => 10000, | |
94 ); | |
95 Description : Creates a new MethodLinkSpeciesSet object | |
96 Returntype : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object | |
97 Exceptions : none | |
98 Caller : general | |
99 | |
100 =cut | |
101 | |
102 sub new { | |
103 my $caller = shift @_; | |
104 my $class = ref($caller) || $caller; | |
105 | |
106 my $self = $class->SUPER::new(@_); # deal with Storable stuff | |
107 | |
108 my ($method, $method_link_id, $method_link_type, $method_link_class, | |
109 $species_set_obj, $species_set, $species_set_id, | |
110 $name, $source, $url, $max_alignment_length) = | |
111 rearrange([qw( | |
112 METHOD METHOD_LINK_ID METHOD_LINK_TYPE METHOD_LINK_CLASS | |
113 SPECIES_SET_OBJ SPECIES_SET SPECIES_SET_ID | |
114 NAME SOURCE URL MAX_ALIGNMENT_LENGTH)], @_); | |
115 | |
116 if($method) { | |
117 $self->method($method); | |
118 } else { | |
119 warning("Please consider using -method to set the method instead of older/deprecated ways to do it"); | |
120 } | |
121 | |
122 # the following three should generate a deprecated warning: | |
123 $self->method_link_id($method_link_id) if (defined ($method_link_id)); | |
124 $self->method_link_type($method_link_type) if (defined ($method_link_type)); | |
125 $self->method_link_class($method_link_class) if (defined ($method_link_class)); | |
126 | |
127 warning("method has not been set in MLSS->new") unless($self->method()); | |
128 | |
129 $self->species_set_obj($species_set_obj) if (defined ($species_set_obj)); | |
130 $self->species_set($species_set) if (defined ($species_set)); | |
131 $self->species_set_id($species_set_id) if (defined ($species_set_id)); | |
132 | |
133 warning("species_set_obj has not been set in MLSS->new") unless($self->species_set_obj()); | |
134 | |
135 $self->name($name) if (defined ($name)); | |
136 $self->source($source) if (defined ($source)); | |
137 $self->url($url) if (defined ($url)); | |
138 $self->max_alignment_length($max_alignment_length) if (defined ($max_alignment_length)); | |
139 | |
140 return $self; | |
141 } | |
142 | |
143 | |
144 sub new_fast { | |
145 my $class = shift; | |
146 my $hashref = shift; | |
147 | |
148 return bless $hashref, $class; | |
149 } | |
150 | |
151 | |
152 sub method { | |
153 my $self = shift @_; | |
154 | |
155 if(@_) { | |
156 $self->{'method'} = shift @_; | |
157 } | |
158 | |
159 return $self->{'method'}; | |
160 } | |
161 | |
162 | |
163 =head2 method_link_id | |
164 | |
165 Arg [1] : (opt.) integer method_link_id | |
166 Example : my $meth_lnk_id = $method_link_species_set->method_link_id(); | |
167 Example : $method_link_species_set->method_link_id(23); | |
168 Description: get/set for attribute method_link_id | |
169 Returntype : integer | |
170 Exceptions : none | |
171 Caller : general | |
172 Status : DEPRECATED, use $mlss->method->dbID instead | |
173 | |
174 =cut | |
175 | |
176 sub method_link_id { | |
177 my $self = shift @_; | |
178 | |
179 deprecate("MLSS->method_link_id() is DEPRECATED, please use MLSS->method->dbID()"); | |
180 | |
181 if(@_) { | |
182 if($self->method) { | |
183 $self->method->dbID( @_ ); | |
184 } else { | |
185 $self->method( Bio::EnsEMBL::Compara::Method->new(-dbID => @_) ); | |
186 } | |
187 } | |
188 | |
189 # type is known => fetch the method from DB and set all of its attributes | |
190 if (!$self->method->dbID and $self->adaptor and my $type = $self->method->type) { | |
191 my $method_adaptor = $self->adaptor->db->getMethodAdaptor; | |
192 if( my $fetched_method = $method_adaptor->fetch_by_type( $type ) ) { | |
193 $self->method( $fetched_method ); | |
194 } else { | |
195 warning("Could not fetch method by type '$type'"); | |
196 } | |
197 } | |
198 | |
199 return $self->method->dbID(); | |
200 } | |
201 | |
202 | |
203 =head2 method_link_type | |
204 | |
205 Arg [1] : (opt.) string method_link_type | |
206 Example : my $meth_lnk_type = $method_link_species_set->method_link_type(); | |
207 Example : $method_link_species_set->method_link_type("BLASTZ_NET"); | |
208 Description: get/set for attribute method_link_type | |
209 Returntype : string | |
210 Exceptions : none | |
211 Caller : general | |
212 Status : DEPRECATED, use $mlss->method->type instead | |
213 | |
214 =cut | |
215 | |
216 sub method_link_type { | |
217 my $self = shift @_; | |
218 | |
219 deprecate("MLSS->method_link_type() is DEPRECATED, please use MLSS->method->type()"); | |
220 | |
221 if(@_) { | |
222 if($self->method) { | |
223 $self->method->type( @_ ); | |
224 } else { | |
225 $self->method( Bio::EnsEMBL::Compara::Method->new(-type => @_) ); | |
226 } | |
227 } | |
228 | |
229 # dbID is known => fetch the method from DB and set all of its attributes | |
230 if (!$self->method->type and $self->adaptor and my $dbID = $self->method->dbID) { | |
231 my $method_adaptor = $self->adaptor->db->getMethodAdaptor; | |
232 if( my $fetched_method = $method_adaptor->fetch_by_dbID( $dbID ) ) { | |
233 $self->method( $fetched_method ); | |
234 } else { | |
235 warning("Could not fetch method by dbID '$dbID'"); | |
236 } | |
237 } | |
238 | |
239 return $self->method->type(); | |
240 } | |
241 | |
242 | |
243 =head2 method_link_class | |
244 | |
245 Arg [1] : (opt.) string method_link_class | |
246 Example : my $meth_lnk_class = $method_link_species_set->method_link_class(); | |
247 Example : $method_link_species_set->method_link_class("GenomicAlignBlock.multiple_alignment"); | |
248 Description: get/set for attribute method_link_class | |
249 Returntype : string | |
250 Exceptions : none | |
251 Caller : general | |
252 Status : DEPRECATED, use $mlss->method->class instead | |
253 | |
254 =cut | |
255 | |
256 sub method_link_class { | |
257 my $self = shift @_; | |
258 | |
259 deprecate("MLSS->method_link_class() is DEPRECATED, please use MLSS->method->class()"); | |
260 | |
261 if(@_) { | |
262 if($self->method) { | |
263 $self->method->class( @_ ); | |
264 } else { | |
265 $self->method( Bio::EnsEMBL::Compara::Method->new(-class => @_) ); | |
266 } | |
267 } | |
268 | |
269 # dbID is known => fetch the method from DB and set all of its attributes | |
270 if (!$self->method->class and $self->adaptor and my $dbID = $self->method->dbID) { | |
271 my $method_adaptor = $self->adaptor->db->getMethodAdaptor; | |
272 if( my $fetched_method = $method_adaptor->fetch_by_dbID( $dbID ) ) { | |
273 $self->method( $fetched_method ); | |
274 } else { | |
275 warning("Could not fetch method by dbID '$dbID'"); | |
276 } | |
277 } | |
278 | |
279 return $self->method->class(); | |
280 } | |
281 | |
282 | |
283 =head2 species_set_obj | |
284 | |
285 Arg [1] : (opt.) Bio::EnsEMBL::Compara::SpeciesSet species_set object | |
286 Example : my $species_set_obj = $mlss->species_set_obj(); | |
287 Example : $mlss->species_set_obj( $species_set_obj ); | |
288 Description: getter/setter for species_set_obj attribute | |
289 Returntype : Bio::EnsEMBL::Compara::SpeciesSet | |
290 Exceptions : none | |
291 Caller : general | |
292 | |
293 =cut | |
294 | |
295 sub species_set_obj { | |
296 my $self = shift @_; | |
297 | |
298 if(@_) { | |
299 $self->{'species_set'} = shift @_; | |
300 } | |
301 | |
302 return $self->{'species_set'}; | |
303 } | |
304 | |
305 | |
306 sub _set_genome_dbs { | |
307 my ($self, $arg) = @_; | |
308 | |
309 my %genome_db_hash = (); | |
310 foreach my $gdb (@$arg) { | |
311 throw("undefined value used as a Bio::EnsEMBL::Compara::GenomeDB\n") if (!defined($gdb)); | |
312 throw("$gdb must be a Bio::EnsEMBL::Compara::GenomeDB\n") unless $gdb->isa("Bio::EnsEMBL::Compara::GenomeDB"); | |
313 | |
314 if(defined $genome_db_hash{$gdb->dbID}) { | |
315 warn("GenomeDB (".$gdb->name."; dbID=".$gdb->dbID .") appears twice in this Bio::EnsEMBL::Compara::MethodLinkSpeciesSet\n"); | |
316 } else { | |
317 $genome_db_hash{$gdb->dbID} = $gdb; | |
318 } | |
319 } | |
320 my $genome_dbs = [ values %genome_db_hash ] ; | |
321 | |
322 my $species_set_id = $self->adaptor && $self->adaptor->db->get_SpeciesSetAdaptor->find_species_set_id_by_GenomeDBs_mix( $genome_dbs ); | |
323 | |
324 my $ss_obj = Bio::EnsEMBL::Compara::SpeciesSet->new( | |
325 -genome_dbs => $genome_dbs, | |
326 $species_set_id ? (-species_set_id => $species_set_id) : (), | |
327 ); | |
328 $self->species_set_obj( $ss_obj ); | |
329 } | |
330 | |
331 | |
332 | |
333 =head2 species_set_id | |
334 | |
335 Arg [1] : (opt.) integer species_set_id | |
336 Example : my $species_set_id = $method_link_species_set->species_set_id(); | |
337 Example : $method_link_species_set->species_set_id(23); | |
338 Description: get/set for attribute species_set_id | |
339 Returntype : integer | |
340 Exceptions : none | |
341 Caller : general | |
342 Status : DEPRECATED, use $mlss->species_set_obj->dbID instead | |
343 | |
344 =cut | |
345 | |
346 sub species_set_id { | |
347 my $self = shift @_; | |
348 | |
349 deprecate("MLSS->species_set_id() is DEPRECATED, please use MLSS->species_set_obj->dbID()"); | |
350 | |
351 if(my $species_set_obj = $self->species_set_obj) { | |
352 return $species_set_obj->dbID( @_ ); | |
353 } else { | |
354 warning("SpeciesSet object has not been set, so cannot deal with its dbID"); | |
355 return undef; | |
356 } | |
357 } | |
358 | |
359 | |
360 =head2 species_set | |
361 | |
362 Arg [1] : (opt.) listref of Bio::EnsEMBL::Compara::GenomeDB objects | |
363 Example : my $meth_lnk_species_set = $method_link_species_set->species_set(); | |
364 Example : $method_link_species_set->species_set([$gdb1, $gdb2, $gdb3]); | |
365 Description: get/set for attribute species_set | |
366 Returntype : listref of Bio::EnsEMBL::Compara::GenomeDB objects | |
367 Exceptions : Thrown if any argument is not a Bio::EnsEMBL::Compara::GenomeDB | |
368 object or a GenomeDB entry appears several times | |
369 Caller : general | |
370 Status : DEPRECATED, use $mlss->species_set_obj->genome_dbs instead | |
371 | |
372 =cut | |
373 | |
374 sub species_set { | |
375 my ($self, $arg) = @_; | |
376 | |
377 deprecate("MLSS->species_set() is DEPRECATED, please use MLSS->species_set_obj->genome_dbs()"); | |
378 | |
379 if($arg) { | |
380 if(UNIVERSAL::isa($arg, 'Bio::EnsEMBL::Compara::SpeciesSet')) { | |
381 | |
382 $self->species_set_obj( $arg ); | |
383 | |
384 } elsif((ref($arg) eq 'ARRAY') and @$arg) { | |
385 | |
386 $self->_set_genome_dbs( $arg ); | |
387 | |
388 } else { | |
389 die "Wrong type of argument to $self->species_set()"; | |
390 } | |
391 } | |
392 return $self->species_set_obj->genome_dbs; # for compatibility, we shall keep this method until everyone has switched to using species_set_obj() | |
393 } | |
394 | |
395 | |
396 =head2 name | |
397 | |
398 Arg [1] : (opt.) string $name | |
399 Example : my $name = $method_link_species_set->name(); | |
400 Example : $method_link_species_set->name("families"); | |
401 Description: get/set for attribute name | |
402 Returntype : string | |
403 Exceptions : none | |
404 Caller : general | |
405 | |
406 =cut | |
407 | |
408 sub name { | |
409 my ($self, $arg) = @_; | |
410 | |
411 if (defined($arg)) { | |
412 $self->{'name'} = $arg ; | |
413 } | |
414 | |
415 return $self->{'name'}; | |
416 } | |
417 | |
418 | |
419 =head2 source | |
420 | |
421 Arg [1] : (opt.) string $name | |
422 Example : my $name = $method_link_species_set->source(); | |
423 Example : $method_link_species_set->source("ensembl"); | |
424 Description: get/set for attribute source. The source refers to who | |
425 generated the data in a first instance (ensembl, ucsc...) | |
426 Returntype : string | |
427 Exceptions : none | |
428 Caller : general | |
429 | |
430 =cut | |
431 | |
432 sub source { | |
433 my ($self, $arg) = @_; | |
434 | |
435 if (defined($arg)) { | |
436 $self->{'source'} = $arg ; | |
437 } | |
438 | |
439 return $self->{'source'}; | |
440 } | |
441 | |
442 | |
443 =head2 url | |
444 | |
445 Arg [1] : (opt.) string $url | |
446 Example : my $name = $method_link_species_set->source(); | |
447 Example : $method_link_species_set->url("http://hgdownload.cse.ucsc.edu/goldenPath/monDom1/vsHg17/"); | |
448 Description: get/set for attribute url. Defines where the data come from if they | |
449 have been imported | |
450 Returntype : string | |
451 Exceptions : none | |
452 Caller : general | |
453 | |
454 =cut | |
455 | |
456 sub url { | |
457 my ($self, $arg) = @_; | |
458 | |
459 if (defined($arg)) { | |
460 $self->{'url'} = $arg ; | |
461 } | |
462 | |
463 return $self->{'url'}; | |
464 } | |
465 | |
466 | |
467 =head2 get_common_classification | |
468 | |
469 Arg [1] : -none- | |
470 Example : my $common_classification = $method_link_species_set-> | |
471 get_common_classification(); | |
472 Description: This method fetches the taxonimic classifications for all the | |
473 species included in this | |
474 Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object and | |
475 returns the common part of them. | |
476 Returntype : array of strings | |
477 Exceptions : | |
478 Caller : general | |
479 | |
480 =cut | |
481 | |
482 sub get_common_classification { | |
483 my ($self) = @_; | |
484 my $common_classification; | |
485 | |
486 my $species_set = $self->species_set(); | |
487 | |
488 foreach my $this_genome_db (@$species_set) { | |
489 my @classification = split(" ", $this_genome_db->taxon->classification); | |
490 if (!defined($common_classification)) { | |
491 @$common_classification = @classification; | |
492 } else { | |
493 my $new_common_classification = []; | |
494 for (my $i = 0; $i <@classification; $i++) { | |
495 for (my $j = 0; $j<@$common_classification; $j++) { | |
496 if ($classification[$i] eq $common_classification->[$j]) { | |
497 push(@$new_common_classification, splice(@$common_classification, $j, 1)); | |
498 last; | |
499 } | |
500 } | |
501 } | |
502 $common_classification = $new_common_classification; | |
503 } | |
504 } | |
505 | |
506 return $common_classification; | |
507 } | |
508 | |
509 | |
510 =head2 max_alignment_length | |
511 | |
512 Arg [1] : (opt.) int $max_alignment_length | |
513 Example : my $max_alignment_length = $method_link_species_set-> | |
514 max_alignment_length(); | |
515 Example : $method_link_species_set->max_alignment_length(1000); | |
516 Description: get/set for attribute max_alignment_length | |
517 Returntype : integer | |
518 Exceptions : | |
519 Caller : general | |
520 | |
521 =cut | |
522 | |
523 sub max_alignment_length { | |
524 my $self = shift @_; | |
525 | |
526 if(@_) { | |
527 $self->add_tag('max_align', shift @_); | |
528 } | |
529 | |
530 return $self->get_value_for_tag('max_align') || $DEFAULT_MAX_ALIGNMENT; | |
531 } | |
532 | |
533 | |
534 =head2 toString | |
535 | |
536 Args : (none) | |
537 Example : print $mlss->toString()."\n"; | |
538 Description: returns a stringified representation of the method_link_species_set | |
539 Returntype : string | |
540 | |
541 =cut | |
542 | |
543 sub toString { | |
544 my $self = shift; | |
545 | |
546 return ref($self).": dbID=".($self->dbID || '?'). | |
547 ", name='".$self->name. | |
548 "', source='".$self->source. | |
549 "', url='".$self->url. | |
550 "', max_alignment_length=".($self->max_alignment_length || '?'). | |
551 ", {".$self->method->toString."} x {".$self->species_set_obj->toString."}"; | |
552 } | |
553 | |
554 | |
555 1; |