comparison variant_effect_predictor/Bio/EnsEMBL/Compara/ConstrainedElement.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:21066c0abaf5
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =head1 NAME
20
21 Bio::EnsEMBL::Compara::ConstrainedElement - constrained element data produced by Gerp
22
23 =head1 SYNOPSIS
24
25 use Bio::EnsEMBL::Compara::ConstrainedElement;
26
27 my $constrained_element = new Bio::EnsEMBL::Compara::ConstrainedElement(
28 -adaptor => $constrained_element_adaptor,
29 -method_link_species_set_id => $method_link_species_set_id,
30 -reference_dnafrag_id => $dnafrag_id,
31 -score => 56.2,
32 -p_value => '1.203e-6',
33 -alignment_segments => [ [$dnafrag1_id, $start, $end, $genome_db_id, $dnafrag1_name ], [$dnafrag2_id, ... ], ... ],
34 );
35
36 GET / SET VALUES
37 $constrained_element->adaptor($constrained_element_adaptor);
38 $constrained_element->dbID($constrained_element_id);
39 $constrained_element->method_link_species_set_id($method_link_species_set_id);
40 $constrained_element->score(56.2);
41 $constrained_element->p_value('5.62e-9');
42 $constrained_element->alignment_segments([ [$dnafrag_id, $start, $end, $genome_db_id, $dnafrag_name ], ... ]);
43 $constrained_element->slice($slice);
44 $constrained_element->start($constrained_element_start - $slice_start + 1);
45 $constrained_element->end($constrained_element_end - $slice_start + 1);
46 $constrained_element->seq_region_start($self->slice->start + $self->{'start'} - 1);
47 $constrained_element->seq_region_end($self->slice->start + $self->{'end'} - 1);
48 $constrained_element->strand($strand);
49 $constrained_element->reference_dnafrag_id($dnafrag_id);
50
51 =head1 OBJECT ATTRIBUTES
52
53 =over
54
55 =item dbID
56
57 corresponds to constrained_element.constrained_element_id
58
59 =item adaptor
60
61 Bio::EnsEMBL::Compara::DBSQL::ConstrainedElementAdaptor object to access DB
62
63 =item method_link_species_set_id
64
65 corresponds to method_link_species_set.method_link_species_set_id (external ref.)
66
67 =item score
68
69 corresponds to constrained_element.score
70
71 =item p_value
72
73 corresponds to constrained_element.p_value
74
75 =item slice
76
77 corresponds to a Bio::EnsEMBL::Slice
78
79 =item start
80
81 corresponds to a constrained_element.dnafrag_start (in slice coordinates)
82
83 =item end
84
85 corresponds to a constrained_element.dnafrag_end (in slice coordinates)
86
87 =item seq_region_start
88
89 corresponds to a constrained_element.dnafrag_start (in genomic (absolute) coordinates)
90
91 =item seq_region_end
92
93 corresponds to a constrained_element.dnafrag_end (in genomic (absolute) coordinates)
94
95 =item strand
96
97 corresponds to a constrained_element.strand
98
99 =item $alignment_segments
100
101 listref of listrefs (each of which contain 5 strings (dnafrag.dnafrag_id, constrained_element.dnafrag_start,
102 constrained_element.dnafrag_end, constrained_element.strand, genome_db.genome_db_id, dnafrag.dnafrag_name)
103 [ [ $dnafrag_id, $start, $end, $genome_db_id, $dnafrag_name ], .. ]
104 Each inner listref contains information about one of the species sequences which make up the constarained
105 element block from the alignment.
106
107 =back
108
109 =head1 APPENDIX
110
111 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
112
113 =cut
114
115
116 # Let the code begin...
117
118
119 package Bio::EnsEMBL::Compara::ConstrainedElement;
120 use strict;
121
122 # Object preamble
123 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
124 use Bio::EnsEMBL::Utils::Exception qw(throw warning info deprecate verbose);
125 use Bio::EnsEMBL::Compara::DnaFrag;
126 use Bio::SimpleAlign;
127 use Data::Dumper;
128
129
130 =head2 new (CONSTRUCTOR)
131
132 Arg [-dbID] : int $dbID (the database ID for
133 the constrained element block for this object)
134 Arg [-ADAPTOR]
135 : (opt.) Bio::EnsEMBL::Compara::DBSQL::ConstrainedElementAdaptor $adaptor
136 (the adaptor for connecting to the database)
137 Arg [-METHOD_LINK_SPECIES_SET_ID]
138 : int $mlss_id (the database internal ID for the $mlss)
139 Arg [-SCORE]
140 : float $score (the score of this alignment)
141 Arg [-ALIGNMENT_SEGMENTS]
142 : (opt.) listref of listrefs which each contain 5 values
143 [ [ $dnafrag_id, $dnafrag_start, $dnafrag_end, $genome_db_id, $dnafrag_name ], ... ]
144 corresponding to the all the species in the constrained element block.
145 Arg [-P_VALUE]
146 : (opt.) string $p_value (the p_value of this constrained element)
147 Arg [-SLICE]
148 : (opt.) Bio::EnsEMBL::Slice object
149 Arg [-START]
150 : (opt.) int ($dnafrag_start - Bio::EnsEMBL::Slice->start + 1).
151 Arg [-END]
152 : (opt.) int ($dnafrag_end - Bio::EnsEMBL::Slice->start + 1).
153 Arg [-STRAND]
154 : (opt.) int (the strand from the genomic_align).
155 Arg [-REFERENCE_DNAFRAG_ID]
156 : (opt.) int $dnafrag_id of the slice or dnafrag
157
158 Example : my $constrained_element =
159 new Bio::EnsEMBL::Compara::ConstrainedElement(
160 -dbID => $constrained_element_id,
161 -adaptor => $adaptor,
162 -method_link_species_set_id => $method_link_species_set_id,
163 -score => 28.2,
164 -alignment_segments => [ [ $dnafrag_id, $dnafrag_start, $dnafrag_end, $genome_db_id, $dnafrag_name ], .. ],
165 #danfarg_[start|end|id] from constrained_element table
166 -p_value => '5.023e-6',
167 -slice => $slice_obj,
168 -start => ( $dnafrag_start - $slice_obj->start + 1),
169 -end => ( $dnafrag_end - $slice_obj->start + 1),
170 -strand => $strand,
171 -reference_dnafrag_id => $dnafrag_id,
172 );
173 Description: Creates a new ConstrainedElement object
174 Returntype : Bio::EnsEMBL::Compara::DBSQL::ConstrainedElement
175 Exceptions : none
176 Caller : general
177
178 =cut
179
180 sub new {
181 my($class, @args) = @_;
182
183 my $self = {};
184 bless $self,$class;
185
186 my ($adaptor, $dbID, $alignment_segments,
187 $method_link_species_set_id, $score, $p_value,
188 $slice, $start, $end, $strand, $reference_dnafrag_id) =
189 rearrange([qw(
190 ADAPTOR DBID ALIGNMENT_SEGMENTS
191 METHOD_LINK_SPECIES_SET_ID SCORE P_VALUE
192 SLICE START END STRAND REFERENCE_DNAFRAG_ID
193 )],
194 @args);
195
196 $self->adaptor($adaptor) if (defined ($adaptor));
197 $self->dbID($dbID)
198 if (defined ($dbID));
199 $self->method_link_species_set_id($method_link_species_set_id)
200 if (defined ($method_link_species_set_id));
201 $self->alignment_segments($alignment_segments)
202 if (defined ($alignment_segments));
203 $self->score($score) if (defined ($score));
204 $self->p_value($p_value) if (defined ($p_value));
205 $self->slice($slice) if (defined ($slice));
206 $self->start($start) if (defined ($start));
207 $self->end($end) if (defined ($end));
208 $self->strand($strand) if (defined ($strand));
209 $self->reference_dnafrag_id($reference_dnafrag_id)
210 if (defined($reference_dnafrag_id));
211 return $self;
212 }
213
214 sub new_fast {
215 my $class = shift;
216 my $hashref = shift;
217
218 return bless $hashref, $class;
219 }
220
221 =head2 adaptor
222
223 Arg [1] : Bio::EnsEMBL::Compara::DBSQL::ConstrainedElementAdaptor
224 Example : my $cons_ele_adaptor = $constrained_element->adaptor();
225 Example : $cons_ele_adaptor->adaptor($cons_ele_adaptor);
226 Description: Getter/Setter for the adaptor this object uses for database
227 interaction.
228 Returntype : Bio::EnsEMBL::Compara::DBSQL::ConstrainedElementAdaptor object
229 Exceptions : thrown if $adaptor is not a
230 Bio::EnsEMBL::Compara::DBSQL::ConstrainedElementAdaptor object
231 Caller : general
232
233 =cut
234
235 sub adaptor {
236 my ($self, $adaptor) = @_;
237
238 if (defined($adaptor)) {
239 throw("$adaptor is not a Bio::EnsEMBL::Compara::DBSQL::ConstrainedElementAdaptor object")
240 unless ($adaptor->isa("Bio::EnsEMBL::Compara::DBSQL::ConstrainedElementAdaptor"));
241 $self->{'adaptor'} = $adaptor;
242 }
243
244 return $self->{'adaptor'};
245 }
246
247 =head2 dbID
248
249 Arg [1] : integer $dbID
250 Example : my $dbID = $constrained_element->dbID();
251 Example : $constrained_element->dbID(2);
252 Description: Getter/Setter for the attribute dbID
253 Returntype : integer
254 Exceptions : returns undef if no ref.dbID
255 Caller : general
256
257 =cut
258
259 sub dbID {
260 my ($self, $dbID) = @_;
261
262 if (defined($dbID)) {
263 $self->{'dbID'} = $dbID;
264 }
265
266 return $self->{'dbID'};
267 }
268
269
270 =head2 p_value
271
272 Arg [1] : float $p_value
273 Example : my $p_value = $constrained_element->p_value();
274 Example : $constrained_element->p_value('5.35242e-105');
275 Description: Getter/Setter for the attribute p_value
276 Returntype : float
277 Exceptions : returns undef if no ref.p_value
278 Caller : general
279
280 =cut
281
282 sub p_value {
283 my ($self, $p_value) = @_;
284
285 if (defined($p_value)) {
286 $self->{'p_value'} = $p_value;
287 }
288
289 return $self->{'p_value'};
290 }
291
292
293 =head2 score
294
295 Arg [1] : float $score
296 Example : my $score = $constrained_element->score();
297 Example : $constrained_element->score(16.8);
298 Description: Getter/Setter for the attribute score
299 Returntype : float
300 Exceptions : returns undef if no ref.score
301 Caller : general
302
303 =cut
304
305 sub score {
306 my ($self, $score) = @_;
307
308 if (defined($score)) {
309 $self->{'score'} = $score;
310 }
311 return $self->{'score'};
312 }
313
314 =head2 method_link_species_set_id
315
316 Arg [1] : integer $method_link_species_set_id
317 Example : $method_link_species_set_id = $constrained_element->method_link_species_set_id;
318 Example : $constrained_element->method_link_species_set_id(3);
319 Description: Getter/Setter for the attribute method_link_species_set_id.
320 Returntype : integer
321 Exceptions : returns undef if no ref.method_link_species_set_id
322 Caller : object::methodname
323
324 =cut
325
326 sub method_link_species_set_id {
327 my ($self, $method_link_species_set_id) = @_;
328
329 if (defined($method_link_species_set_id)) {
330 $self->{'method_link_species_set_id'} = $method_link_species_set_id;
331 }
332
333 return $self->{'method_link_species_set_id'};
334 }
335
336 =head2 alignment_segments
337
338 Arg [1] : listref $alignment_segments [ [ $dnafrag_id, $start, $end, $genome_db_id, $dnafrag_name ], .. ]
339 Example : my $alignment_segments = $constrained_element->alignment_segments();
340 $constrained_element->alignment_segments($alignment_segments);
341 Description: Getter/Setter for the attribute alignment_segments
342 Returntype : listref
343 Exceptions : returns undef if no ref.alignment_segments
344 Caller : general
345
346 =cut
347
348 sub alignment_segments {
349 my ($self, $alignment_segments) = @_;
350
351 if (defined($alignment_segments)) {
352 $self->{'alignment_segments'} = $alignment_segments;
353 }
354
355 return $self->{'alignment_segments'};
356 }
357
358
359 =head2 slice
360
361 Arg [1] : Bio::EnsEMBL::Slice $slice
362 Example : $slice = $constrained_element->slice;
363 Example : $constrained_element->slice($slice);
364 Description: Getter/Setter for the attribute slice.
365 Returntype : Bio::EnsEMBL::Slice object
366 Exceptions : returns undef if no ref.slice
367 Caller : object::methodname
368
369 =cut
370
371 sub slice {
372 my ($self, $slice) = @_;
373
374 if (defined($slice)) {
375 $self->{'slice'} = $slice;
376 }
377
378 return $self->{'slice'};
379 }
380
381 =head2 start
382
383 Arg [1] : (optional) int $start
384 Example : $start = $constrained_element->start;
385 Example : $constrained_element->start($start);
386 Description: Getter/Setter for the attribute start.
387 Returntype : int
388 Exceptions : returns undef if no ref.start
389 Caller : object::methodname
390
391 =cut
392
393 sub start {
394 my ($self, $start) = @_;
395
396 if (defined($start)) {
397 $self->{'start'} = $start;
398 }
399
400 return $self->{'start'};
401 }
402
403 =head2 end
404
405 Arg [1] : (optional) int $end
406 Example : $end = $constrained_element->end;
407 Example : $constrained_element->end($end);
408 Description: Getter/Setter for the attribute end relative to the begining of the slice.
409 Returntype : int
410 Exceptions : returns undef if no ref.end
411 Caller : object::methodname
412
413 =cut
414
415 sub end {
416 my ($self, $end) = @_;
417
418 if (defined($end)) {
419 $self->{'end'} = $end;
420 }
421
422 return $self->{'end'};
423 }
424
425
426 =head2 seq_region_start
427
428 Arg [1] : (optional) int $seq_region_start
429 Example : $seq_region_start = $constrained_element->seq_region_start;
430 Example : $constrained_element->seq_region_start($seq_region_start);
431 Description: Getter/Setter for the attribute start relative to the begining of the dnafrag (genomic coords).
432 Returntype : int
433 Exceptions : returns undef if no ref.seq_region_start
434 Caller : object::methodname
435
436 =cut
437 sub seq_region_start {
438 my ($self, $seq_region_start) = @_;
439
440 if(defined($seq_region_start)) {
441 $self->{'seq_region_start'} = $seq_region_start;
442 } else {
443 $self->{'seq_region_start'} = $self->slice->start + $self->{'start'} - 1;
444 }
445 return $self->{'seq_region_start'};
446 }
447
448
449 =head2 seq_region_end
450
451 Arg [1] : (optional) int $seq_region_end
452 Example : $seq_region_end = $constrained_element->seq_region_end
453 Example : $constrained_element->seq_region_end($seq_region_end);
454 Description: Getter/Setter for the attribute end relative to the begining of the dnafrag (genomic coords).
455 Returntype : int
456 Exceptions : returns undef if no ref.seq_region_end
457 Caller : object::methodname
458
459 =cut
460 sub seq_region_end {
461 my ($self, $seq_region_end) = @_;
462
463 if(defined($seq_region_end)) {
464 $self->{'seq_region_end'} = $seq_region_end;
465 } else {
466 $self->{'seq_region_end'} = $self->slice->start + $self->{'end'} - 1;
467 }
468 return $self->{'seq_region_end'};
469 }
470
471
472
473 =head2 strand
474
475 Arg [1] : (optional) int $stand$
476 Example : $end = $constrained_element->strand;
477 Example : $constrained_element->end($strand);
478 Description: Getter/Setter for the attribute genomic_align strand.
479 Returntype : int
480 Exceptions : returns undef if no ref.strand
481 Caller : object::methodname
482
483 =cut
484
485 sub strand {
486 my ($self, $strand) = @_;
487
488 if (defined($strand)) {
489 $self->{'strand'} = $strand;
490 }
491
492 return $self->{'strand'};
493 }
494
495 =head2 reference_dnafrag_id
496
497 Arg [1] : (optional) int $reference_dnafrag_id
498 Example : $dnafrag_id = $constrained_element->reference_dnafrag_id;
499 Example : $constrained_element->reference_dnafrag_id($dnafrag_id);
500 Description: Getter/Setter for the attribute end.
501 Returntype : int
502 Exceptions : returns undef if no ref.reference_dnafrag_id
503 Caller : object::methodname
504
505 =cut
506
507 sub reference_dnafrag_id {
508 my ($self, $reference_dnafrag_id) = @_;
509
510 if (defined($reference_dnafrag_id)) {
511 $self->{'reference_dnafrag_id'} = $reference_dnafrag_id;
512 }
513
514 return $self->{'reference_dnafrag_id'};
515 }
516
517 =head2 get_SimpleAlign
518
519 Arg [1] : Optional flags for formatting displayed MSA
520 Example : my $out = Bio::AlignIO->newFh(-fh=>\*STDOUT, -format=> "clustalw");
521 my $cons = $ce_adaptor->fetch_all_by_MethodLinkSpeciesSet_Slice($mlss, $slice);
522 foreach my $constrained_element(@{ $cons }) {
523 my $simple_align = $constrained_element->get_SimpleAlign("uc");
524 print $out $simple_align;
525 }
526 Description: Rebuilds the constrained element alignment
527 Returntype : Bio::SimpleAlign object
528 Exceptions : throw if you can not get a Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object from the constrained element
529 Caller : object::methodname
530
531 =cut
532
533 sub get_SimpleAlign {
534 my ($self, @flags) = @_;
535
536 my $mlss_adaptor = $self->adaptor->db->get_MethodLinkSpeciesSet;
537
538 my $cons_eles_mlss = $mlss_adaptor->fetch_by_dbID($self->method_link_species_set_id());
539
540 if (defined($cons_eles_mlss)) {
541 throw("$cons_eles_mlss is not a Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object")
542 unless ($cons_eles_mlss->isa("Bio::EnsEMBL::Compara::MethodLinkSpeciesSet"));
543 } else {
544 throw("unable to get a Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object from this constrained element");
545 }
546
547 my $msa_mlss_id = $cons_eles_mlss->get_tagvalue("msa_mlss_id"); # The mlss_id of the alignments from which the constrained elements were generated
548
549 my $msa_mlss = $mlss_adaptor->fetch_by_dbID( $msa_mlss_id );
550
551 # setting the flags
552 my $skip_empty_GenomicAligns = 1;
553 my $uc = 0;
554 my $translated = 0;
555
556 for my $flag ( @flags ) {
557 $uc = 1 if ($flag =~ /^uc$/i);
558 $translated = 1 if ($flag =~ /^translated$/i);
559 }
560
561 my $genomic_align_block_adaptor = $self->adaptor->db->get_GenomicAlignBlock;
562 $self->start(1) if $self->start <= 0;
563 my $gabs = $genomic_align_block_adaptor->fetch_all_by_MethodLinkSpeciesSet_Slice(
564 $msa_mlss, $self->slice->sub_Slice($self->start, $self->end, $self->slice->strand));
565
566 my $sa = Bio::SimpleAlign->new();
567
568 warn "should be only one genomic_align_block associated with each constrained element\n" if @$gabs > 1;
569
570 my $this_genomic_align_block = $gabs->[0];
571 my $bio07 = 0;
572 if(!$sa->can('add_seq')) {
573 $bio07 = 1;
574 }
575 my $reference_genomic_align = $this_genomic_align_block->reference_genomic_align();
576
577 my $restricted_gab = $this_genomic_align_block->restrict_between_reference_positions(
578 ($self->slice->start + $self->start - 1),
579 ($self->slice->start + $self->end - 1),
580 $reference_genomic_align,
581 $skip_empty_GenomicAligns);
582 print "dbID: ", $this_genomic_align_block->dbID, ". ";
583 foreach my $genomic_align( @{ $restricted_gab->get_all_GenomicAligns } ) {
584 my $alignSeq = $genomic_align->aligned_sequence;
585 my $loc_seq = Bio::LocatableSeq->new(
586 -SEQ => $uc ? uc $alignSeq : lc $alignSeq,
587 -START => $genomic_align->dnafrag_start,
588 -END => $genomic_align->dnafrag_end,
589 -ID => $genomic_align->dnafrag->genome_db->name . "/" . $genomic_align->dnafrag->name,
590 -STRAND => $genomic_align->dnafrag_strand);
591
592 if($bio07) {
593 $sa->addSeq($loc_seq);
594 }else{
595 $sa->add_seq($loc_seq);
596 }
597 }
598 return $sa;
599 }
600
601 =head2 summary_as_hash
602
603 Example : $constrained_summary = $constrained_element->summary_as_hash();
604 Description : Retrieves a textual summary of this ConstrainedElement object.
605 Sadly not descended from Feature, so certain attributes must be explicitly requested
606 Returns : hashref of descriptive strings
607
608 =cut
609
610 sub summary_as_hash {
611 my $self = shift;
612 my $summary_ref;
613 $summary_ref->{'ID'} = $self->dbID;
614 $summary_ref->{'start'} = $self->seq_region_start;
615 $summary_ref->{'end'} = $self->seq_region_end;
616 $summary_ref->{'strand'} = $self->strand;
617 $summary_ref->{'seq_region_name'} = $self->slice->seq_region_name;
618 $summary_ref->{'score'} = $self->score;
619 return $summary_ref;
620 }
621
622 1;