0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =head1 NAME
|
|
20
|
|
21 Bio::EnsEMBL::Compara::MethodLinkSpeciesSet -
|
|
22 Relates every method_link with the species_set for which it has been used
|
|
23
|
|
24 =head1 SYNOPSIS
|
|
25
|
|
26 use Bio::EnsEMBL::Compara::MethodLinkSpeciesSet;
|
|
27 my $method_link_species_set = Bio::EnsEMBL::Compara::MethodLinkSpeciesSet->new(
|
|
28 -adaptor => $method_link_species_set_adaptor,
|
|
29 -method => Bio::EnsEMBL::Compara::Method->new( -type => 'MULTIZ'),
|
|
30 -species_set_obj => Bio::EnsEMBL::Compara::SpeciesSet->new( -genome_dbs => [$gdb1, $gdb2, $gdb3]),
|
|
31 -max_alignment_length => 10000,
|
|
32 );
|
|
33
|
|
34 SET VALUES
|
|
35 $method_link_species_set->dbID( 12 );
|
|
36 $method_link_species_set->adaptor( $mlss_adaptor );
|
|
37 $method_link_species_set->method( Bio::EnsEMBL::Compara::Method->new( -type => 'MULTIZ') );
|
|
38 $method_link_species_set->species_set( Bio::EnsEMBL::Compara::SpeciesSet->new( -genome_dbs => [$gdb1, $gdb2, $gdb3]) );
|
|
39 $method_link_species_set->max_alignment_length( 10000 );
|
|
40
|
|
41 GET VALUES
|
|
42 my $mlss_id = $method_link_species_set->dbID();
|
|
43 my $mlss_adaptor = $method_link_species_set->adaptor();
|
|
44 my $method = $method_link_species_set->method();
|
|
45 my $method_link_id = $method_link_species_set->method->dbID();
|
|
46 my $method_link_type = $method_link_species_set->method->type();
|
|
47 my $species_set = $method_link_species_set->species_set_obj();
|
|
48 my $species_set_id = $method_link_species_set->species_set_obj->dbID();
|
|
49 my $genome_dbs = $method_link_species_set->species_set_obj->genome_dbs();
|
|
50 my $max_alignment_length = $method_link_species_set->max_alignment_length();
|
|
51
|
|
52 =head1 APPENDIX
|
|
53
|
|
54 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
|
|
55
|
|
56 =cut
|
|
57
|
|
58
|
|
59
|
|
60 package Bio::EnsEMBL::Compara::MethodLinkSpeciesSet;
|
|
61
|
|
62 use strict;
|
|
63
|
|
64 use Bio::EnsEMBL::Utils::Exception qw(throw warning deprecate);
|
|
65 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
|
|
66 use Bio::EnsEMBL::Compara::Method;
|
|
67 use Bio::EnsEMBL::Compara::SpeciesSet;
|
|
68
|
|
69 use base ( 'Bio::EnsEMBL::Storable', # inherit dbID(), adaptor() and new() methods
|
|
70 'Bio::EnsEMBL::Compara::Taggable' # inherit everything related to tagability
|
|
71 );
|
|
72
|
|
73 my $DEFAULT_MAX_ALIGNMENT = 20000;
|
|
74
|
|
75
|
|
76 =head2 new (CONSTRUCTOR)
|
|
77
|
|
78 Arg [-DBID] : (opt.) int $dbID (the database internal ID for this object)
|
|
79 Arg [-ADAPTOR] : (opt.) Bio::EnsEMBL::Compara::DBSQL::MethodLinkSpeciesSetAdaptor $adaptor
|
|
80 (the adaptor for connecting to the database)
|
|
81 Arg [-METHOD] : Bio::EnsEMBL::Compara::Method $method object
|
|
82 Arg [-SPECIES_SET_OBJ]: Bio::EnsEMBL::Compara::SpeciesSet $species_set object
|
|
83 Arg [-NAME] : (opt.) string $name (the name for this method_link_species_set)
|
|
84 Arg [-SOURCE] : (opt.) string $source (the source of these data)
|
|
85 Arg [-URL] : (opt.) string $url (the original url of these data)
|
|
86 Arg [-MAX_ALGINMENT_LENGTH]
|
|
87 : (opt.) int $max_alignment_length (the length of the largest alignment
|
|
88 for this MethodLinkSpeciesSet (only used for genomic alignments)
|
|
89 Example : my $method_link_species_set = Bio::EnsEMBL::Compara::MethodLinkSpeciesSet->new(
|
|
90 -adaptor => $method_link_species_set_adaptor,
|
|
91 -method => Bio::EnsEMBL::Compara::Method->new( -type => 'MULTIZ' ),
|
|
92 -species_set => [$gdb1, $gdb2, $gdb3],
|
|
93 -max_alignment_length => 10000,
|
|
94 );
|
|
95 Description : Creates a new MethodLinkSpeciesSet object
|
|
96 Returntype : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object
|
|
97 Exceptions : none
|
|
98 Caller : general
|
|
99
|
|
100 =cut
|
|
101
|
|
102 sub new {
|
|
103 my $caller = shift @_;
|
|
104 my $class = ref($caller) || $caller;
|
|
105
|
|
106 my $self = $class->SUPER::new(@_); # deal with Storable stuff
|
|
107
|
|
108 my ($method, $method_link_id, $method_link_type, $method_link_class,
|
|
109 $species_set_obj, $species_set, $species_set_id,
|
|
110 $name, $source, $url, $max_alignment_length) =
|
|
111 rearrange([qw(
|
|
112 METHOD METHOD_LINK_ID METHOD_LINK_TYPE METHOD_LINK_CLASS
|
|
113 SPECIES_SET_OBJ SPECIES_SET SPECIES_SET_ID
|
|
114 NAME SOURCE URL MAX_ALIGNMENT_LENGTH)], @_);
|
|
115
|
|
116 if($method) {
|
|
117 $self->method($method);
|
|
118 } else {
|
|
119 warning("Please consider using -method to set the method instead of older/deprecated ways to do it");
|
|
120 }
|
|
121
|
|
122 # the following three should generate a deprecated warning:
|
|
123 $self->method_link_id($method_link_id) if (defined ($method_link_id));
|
|
124 $self->method_link_type($method_link_type) if (defined ($method_link_type));
|
|
125 $self->method_link_class($method_link_class) if (defined ($method_link_class));
|
|
126
|
|
127 warning("method has not been set in MLSS->new") unless($self->method());
|
|
128
|
|
129 $self->species_set_obj($species_set_obj) if (defined ($species_set_obj));
|
|
130 $self->species_set($species_set) if (defined ($species_set));
|
|
131 $self->species_set_id($species_set_id) if (defined ($species_set_id));
|
|
132
|
|
133 warning("species_set_obj has not been set in MLSS->new") unless($self->species_set_obj());
|
|
134
|
|
135 $self->name($name) if (defined ($name));
|
|
136 $self->source($source) if (defined ($source));
|
|
137 $self->url($url) if (defined ($url));
|
|
138 $self->max_alignment_length($max_alignment_length) if (defined ($max_alignment_length));
|
|
139
|
|
140 return $self;
|
|
141 }
|
|
142
|
|
143
|
|
144 sub new_fast {
|
|
145 my $class = shift;
|
|
146 my $hashref = shift;
|
|
147
|
|
148 return bless $hashref, $class;
|
|
149 }
|
|
150
|
|
151
|
|
152 sub method {
|
|
153 my $self = shift @_;
|
|
154
|
|
155 if(@_) {
|
|
156 $self->{'method'} = shift @_;
|
|
157 }
|
|
158
|
|
159 return $self->{'method'};
|
|
160 }
|
|
161
|
|
162
|
|
163 =head2 method_link_id
|
|
164
|
|
165 Arg [1] : (opt.) integer method_link_id
|
|
166 Example : my $meth_lnk_id = $method_link_species_set->method_link_id();
|
|
167 Example : $method_link_species_set->method_link_id(23);
|
|
168 Description: get/set for attribute method_link_id
|
|
169 Returntype : integer
|
|
170 Exceptions : none
|
|
171 Caller : general
|
|
172 Status : DEPRECATED, use $mlss->method->dbID instead
|
|
173
|
|
174 =cut
|
|
175
|
|
176 sub method_link_id {
|
|
177 my $self = shift @_;
|
|
178
|
|
179 deprecate("MLSS->method_link_id() is DEPRECATED, please use MLSS->method->dbID()");
|
|
180
|
|
181 if(@_) {
|
|
182 if($self->method) {
|
|
183 $self->method->dbID( @_ );
|
|
184 } else {
|
|
185 $self->method( Bio::EnsEMBL::Compara::Method->new(-dbID => @_) );
|
|
186 }
|
|
187 }
|
|
188
|
|
189 # type is known => fetch the method from DB and set all of its attributes
|
|
190 if (!$self->method->dbID and $self->adaptor and my $type = $self->method->type) {
|
|
191 my $method_adaptor = $self->adaptor->db->getMethodAdaptor;
|
|
192 if( my $fetched_method = $method_adaptor->fetch_by_type( $type ) ) {
|
|
193 $self->method( $fetched_method );
|
|
194 } else {
|
|
195 warning("Could not fetch method by type '$type'");
|
|
196 }
|
|
197 }
|
|
198
|
|
199 return $self->method->dbID();
|
|
200 }
|
|
201
|
|
202
|
|
203 =head2 method_link_type
|
|
204
|
|
205 Arg [1] : (opt.) string method_link_type
|
|
206 Example : my $meth_lnk_type = $method_link_species_set->method_link_type();
|
|
207 Example : $method_link_species_set->method_link_type("BLASTZ_NET");
|
|
208 Description: get/set for attribute method_link_type
|
|
209 Returntype : string
|
|
210 Exceptions : none
|
|
211 Caller : general
|
|
212 Status : DEPRECATED, use $mlss->method->type instead
|
|
213
|
|
214 =cut
|
|
215
|
|
216 sub method_link_type {
|
|
217 my $self = shift @_;
|
|
218
|
|
219 deprecate("MLSS->method_link_type() is DEPRECATED, please use MLSS->method->type()");
|
|
220
|
|
221 if(@_) {
|
|
222 if($self->method) {
|
|
223 $self->method->type( @_ );
|
|
224 } else {
|
|
225 $self->method( Bio::EnsEMBL::Compara::Method->new(-type => @_) );
|
|
226 }
|
|
227 }
|
|
228
|
|
229 # dbID is known => fetch the method from DB and set all of its attributes
|
|
230 if (!$self->method->type and $self->adaptor and my $dbID = $self->method->dbID) {
|
|
231 my $method_adaptor = $self->adaptor->db->getMethodAdaptor;
|
|
232 if( my $fetched_method = $method_adaptor->fetch_by_dbID( $dbID ) ) {
|
|
233 $self->method( $fetched_method );
|
|
234 } else {
|
|
235 warning("Could not fetch method by dbID '$dbID'");
|
|
236 }
|
|
237 }
|
|
238
|
|
239 return $self->method->type();
|
|
240 }
|
|
241
|
|
242
|
|
243 =head2 method_link_class
|
|
244
|
|
245 Arg [1] : (opt.) string method_link_class
|
|
246 Example : my $meth_lnk_class = $method_link_species_set->method_link_class();
|
|
247 Example : $method_link_species_set->method_link_class("GenomicAlignBlock.multiple_alignment");
|
|
248 Description: get/set for attribute method_link_class
|
|
249 Returntype : string
|
|
250 Exceptions : none
|
|
251 Caller : general
|
|
252 Status : DEPRECATED, use $mlss->method->class instead
|
|
253
|
|
254 =cut
|
|
255
|
|
256 sub method_link_class {
|
|
257 my $self = shift @_;
|
|
258
|
|
259 deprecate("MLSS->method_link_class() is DEPRECATED, please use MLSS->method->class()");
|
|
260
|
|
261 if(@_) {
|
|
262 if($self->method) {
|
|
263 $self->method->class( @_ );
|
|
264 } else {
|
|
265 $self->method( Bio::EnsEMBL::Compara::Method->new(-class => @_) );
|
|
266 }
|
|
267 }
|
|
268
|
|
269 # dbID is known => fetch the method from DB and set all of its attributes
|
|
270 if (!$self->method->class and $self->adaptor and my $dbID = $self->method->dbID) {
|
|
271 my $method_adaptor = $self->adaptor->db->getMethodAdaptor;
|
|
272 if( my $fetched_method = $method_adaptor->fetch_by_dbID( $dbID ) ) {
|
|
273 $self->method( $fetched_method );
|
|
274 } else {
|
|
275 warning("Could not fetch method by dbID '$dbID'");
|
|
276 }
|
|
277 }
|
|
278
|
|
279 return $self->method->class();
|
|
280 }
|
|
281
|
|
282
|
|
283 =head2 species_set_obj
|
|
284
|
|
285 Arg [1] : (opt.) Bio::EnsEMBL::Compara::SpeciesSet species_set object
|
|
286 Example : my $species_set_obj = $mlss->species_set_obj();
|
|
287 Example : $mlss->species_set_obj( $species_set_obj );
|
|
288 Description: getter/setter for species_set_obj attribute
|
|
289 Returntype : Bio::EnsEMBL::Compara::SpeciesSet
|
|
290 Exceptions : none
|
|
291 Caller : general
|
|
292
|
|
293 =cut
|
|
294
|
|
295 sub species_set_obj {
|
|
296 my $self = shift @_;
|
|
297
|
|
298 if(@_) {
|
|
299 $self->{'species_set'} = shift @_;
|
|
300 }
|
|
301
|
|
302 return $self->{'species_set'};
|
|
303 }
|
|
304
|
|
305
|
|
306 sub _set_genome_dbs {
|
|
307 my ($self, $arg) = @_;
|
|
308
|
|
309 my %genome_db_hash = ();
|
|
310 foreach my $gdb (@$arg) {
|
|
311 throw("undefined value used as a Bio::EnsEMBL::Compara::GenomeDB\n") if (!defined($gdb));
|
|
312 throw("$gdb must be a Bio::EnsEMBL::Compara::GenomeDB\n") unless $gdb->isa("Bio::EnsEMBL::Compara::GenomeDB");
|
|
313
|
|
314 if(defined $genome_db_hash{$gdb->dbID}) {
|
|
315 warn("GenomeDB (".$gdb->name."; dbID=".$gdb->dbID .") appears twice in this Bio::EnsEMBL::Compara::MethodLinkSpeciesSet\n");
|
|
316 } else {
|
|
317 $genome_db_hash{$gdb->dbID} = $gdb;
|
|
318 }
|
|
319 }
|
|
320 my $genome_dbs = [ values %genome_db_hash ] ;
|
|
321
|
|
322 my $species_set_id = $self->adaptor && $self->adaptor->db->get_SpeciesSetAdaptor->find_species_set_id_by_GenomeDBs_mix( $genome_dbs );
|
|
323
|
|
324 my $ss_obj = Bio::EnsEMBL::Compara::SpeciesSet->new(
|
|
325 -genome_dbs => $genome_dbs,
|
|
326 $species_set_id ? (-species_set_id => $species_set_id) : (),
|
|
327 );
|
|
328 $self->species_set_obj( $ss_obj );
|
|
329 }
|
|
330
|
|
331
|
|
332
|
|
333 =head2 species_set_id
|
|
334
|
|
335 Arg [1] : (opt.) integer species_set_id
|
|
336 Example : my $species_set_id = $method_link_species_set->species_set_id();
|
|
337 Example : $method_link_species_set->species_set_id(23);
|
|
338 Description: get/set for attribute species_set_id
|
|
339 Returntype : integer
|
|
340 Exceptions : none
|
|
341 Caller : general
|
|
342 Status : DEPRECATED, use $mlss->species_set_obj->dbID instead
|
|
343
|
|
344 =cut
|
|
345
|
|
346 sub species_set_id {
|
|
347 my $self = shift @_;
|
|
348
|
|
349 deprecate("MLSS->species_set_id() is DEPRECATED, please use MLSS->species_set_obj->dbID()");
|
|
350
|
|
351 if(my $species_set_obj = $self->species_set_obj) {
|
|
352 return $species_set_obj->dbID( @_ );
|
|
353 } else {
|
|
354 warning("SpeciesSet object has not been set, so cannot deal with its dbID");
|
|
355 return undef;
|
|
356 }
|
|
357 }
|
|
358
|
|
359
|
|
360 =head2 species_set
|
|
361
|
|
362 Arg [1] : (opt.) listref of Bio::EnsEMBL::Compara::GenomeDB objects
|
|
363 Example : my $meth_lnk_species_set = $method_link_species_set->species_set();
|
|
364 Example : $method_link_species_set->species_set([$gdb1, $gdb2, $gdb3]);
|
|
365 Description: get/set for attribute species_set
|
|
366 Returntype : listref of Bio::EnsEMBL::Compara::GenomeDB objects
|
|
367 Exceptions : Thrown if any argument is not a Bio::EnsEMBL::Compara::GenomeDB
|
|
368 object or a GenomeDB entry appears several times
|
|
369 Caller : general
|
|
370 Status : DEPRECATED, use $mlss->species_set_obj->genome_dbs instead
|
|
371
|
|
372 =cut
|
|
373
|
|
374 sub species_set {
|
|
375 my ($self, $arg) = @_;
|
|
376
|
|
377 deprecate("MLSS->species_set() is DEPRECATED, please use MLSS->species_set_obj->genome_dbs()");
|
|
378
|
|
379 if($arg) {
|
|
380 if(UNIVERSAL::isa($arg, 'Bio::EnsEMBL::Compara::SpeciesSet')) {
|
|
381
|
|
382 $self->species_set_obj( $arg );
|
|
383
|
|
384 } elsif((ref($arg) eq 'ARRAY') and @$arg) {
|
|
385
|
|
386 $self->_set_genome_dbs( $arg );
|
|
387
|
|
388 } else {
|
|
389 die "Wrong type of argument to $self->species_set()";
|
|
390 }
|
|
391 }
|
|
392 return $self->species_set_obj->genome_dbs; # for compatibility, we shall keep this method until everyone has switched to using species_set_obj()
|
|
393 }
|
|
394
|
|
395
|
|
396 =head2 name
|
|
397
|
|
398 Arg [1] : (opt.) string $name
|
|
399 Example : my $name = $method_link_species_set->name();
|
|
400 Example : $method_link_species_set->name("families");
|
|
401 Description: get/set for attribute name
|
|
402 Returntype : string
|
|
403 Exceptions : none
|
|
404 Caller : general
|
|
405
|
|
406 =cut
|
|
407
|
|
408 sub name {
|
|
409 my ($self, $arg) = @_;
|
|
410
|
|
411 if (defined($arg)) {
|
|
412 $self->{'name'} = $arg ;
|
|
413 }
|
|
414
|
|
415 return $self->{'name'};
|
|
416 }
|
|
417
|
|
418
|
|
419 =head2 source
|
|
420
|
|
421 Arg [1] : (opt.) string $name
|
|
422 Example : my $name = $method_link_species_set->source();
|
|
423 Example : $method_link_species_set->source("ensembl");
|
|
424 Description: get/set for attribute source. The source refers to who
|
|
425 generated the data in a first instance (ensembl, ucsc...)
|
|
426 Returntype : string
|
|
427 Exceptions : none
|
|
428 Caller : general
|
|
429
|
|
430 =cut
|
|
431
|
|
432 sub source {
|
|
433 my ($self, $arg) = @_;
|
|
434
|
|
435 if (defined($arg)) {
|
|
436 $self->{'source'} = $arg ;
|
|
437 }
|
|
438
|
|
439 return $self->{'source'};
|
|
440 }
|
|
441
|
|
442
|
|
443 =head2 url
|
|
444
|
|
445 Arg [1] : (opt.) string $url
|
|
446 Example : my $name = $method_link_species_set->source();
|
|
447 Example : $method_link_species_set->url("http://hgdownload.cse.ucsc.edu/goldenPath/monDom1/vsHg17/");
|
|
448 Description: get/set for attribute url. Defines where the data come from if they
|
|
449 have been imported
|
|
450 Returntype : string
|
|
451 Exceptions : none
|
|
452 Caller : general
|
|
453
|
|
454 =cut
|
|
455
|
|
456 sub url {
|
|
457 my ($self, $arg) = @_;
|
|
458
|
|
459 if (defined($arg)) {
|
|
460 $self->{'url'} = $arg ;
|
|
461 }
|
|
462
|
|
463 return $self->{'url'};
|
|
464 }
|
|
465
|
|
466
|
|
467 =head2 get_common_classification
|
|
468
|
|
469 Arg [1] : -none-
|
|
470 Example : my $common_classification = $method_link_species_set->
|
|
471 get_common_classification();
|
|
472 Description: This method fetches the taxonimic classifications for all the
|
|
473 species included in this
|
|
474 Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object and
|
|
475 returns the common part of them.
|
|
476 Returntype : array of strings
|
|
477 Exceptions :
|
|
478 Caller : general
|
|
479
|
|
480 =cut
|
|
481
|
|
482 sub get_common_classification {
|
|
483 my ($self) = @_;
|
|
484 my $common_classification;
|
|
485
|
|
486 my $species_set = $self->species_set();
|
|
487
|
|
488 foreach my $this_genome_db (@$species_set) {
|
|
489 my @classification = split(" ", $this_genome_db->taxon->classification);
|
|
490 if (!defined($common_classification)) {
|
|
491 @$common_classification = @classification;
|
|
492 } else {
|
|
493 my $new_common_classification = [];
|
|
494 for (my $i = 0; $i <@classification; $i++) {
|
|
495 for (my $j = 0; $j<@$common_classification; $j++) {
|
|
496 if ($classification[$i] eq $common_classification->[$j]) {
|
|
497 push(@$new_common_classification, splice(@$common_classification, $j, 1));
|
|
498 last;
|
|
499 }
|
|
500 }
|
|
501 }
|
|
502 $common_classification = $new_common_classification;
|
|
503 }
|
|
504 }
|
|
505
|
|
506 return $common_classification;
|
|
507 }
|
|
508
|
|
509
|
|
510 =head2 max_alignment_length
|
|
511
|
|
512 Arg [1] : (opt.) int $max_alignment_length
|
|
513 Example : my $max_alignment_length = $method_link_species_set->
|
|
514 max_alignment_length();
|
|
515 Example : $method_link_species_set->max_alignment_length(1000);
|
|
516 Description: get/set for attribute max_alignment_length
|
|
517 Returntype : integer
|
|
518 Exceptions :
|
|
519 Caller : general
|
|
520
|
|
521 =cut
|
|
522
|
|
523 sub max_alignment_length {
|
|
524 my $self = shift @_;
|
|
525
|
|
526 if(@_) {
|
|
527 $self->add_tag('max_align', shift @_);
|
|
528 }
|
|
529
|
|
530 return $self->get_value_for_tag('max_align') || $DEFAULT_MAX_ALIGNMENT;
|
|
531 }
|
|
532
|
|
533
|
|
534 =head2 toString
|
|
535
|
|
536 Args : (none)
|
|
537 Example : print $mlss->toString()."\n";
|
|
538 Description: returns a stringified representation of the method_link_species_set
|
|
539 Returntype : string
|
|
540
|
|
541 =cut
|
|
542
|
|
543 sub toString {
|
|
544 my $self = shift;
|
|
545
|
|
546 return ref($self).": dbID=".($self->dbID || '?').
|
|
547 ", name='".$self->name.
|
|
548 "', source='".$self->source.
|
|
549 "', url='".$self->url.
|
|
550 "', max_alignment_length=".($self->max_alignment_length || '?').
|
|
551 ", {".$self->method->toString."} x {".$self->species_set_obj->toString."}";
|
|
552 }
|
|
553
|
|
554
|
|
555 1;
|