annotate variant_effect_predictor/Bio/EnsEMBL/AlignStrainSlice.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::AlignStrainSlice - Represents the slice of the genome aligned with certain strains (applying the variations/indels)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 $sa = $db->get_SliceAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 $slice =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 $strainSlice1 = $slice->get_by_Strain($strain_name1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 $strainSlice2 = $slice->get_by_Strain($strain_name2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 my @strainSlices;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 push @strainSlices, $strainSlice1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 push @strainSlices, $strainSlice2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 $alignSlice = Bio::EnsEMBL::AlignStrainSlice->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 -SLICE => $slice,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 -STRAINS => \@strainSlices
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 # Get coordinates of variation in alignSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 my $alleleFeatures = $strainSlice1->get_all_AlleleFeature_Slice();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 foreach my $af ( @{$alleleFeatures} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 my $new_feature = $alignSlice->alignFeature( $af, $strainSlice1 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 print( "Coordinates of the feature in AlignSlice are: ",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 $new_feature->start, "-", $new_feature->end, "\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 A AlignStrainSlice object represents a region of a genome align for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 certain strains. It can be used to align certain strains to a reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 slice.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 package Bio::EnsEMBL::AlignStrainSlice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 use Bio::EnsEMBL::Mapper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 use Bio::EnsEMBL::Mapper::RangeRegistry;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 Arg[1] : Bio::EnsEMBL::Slice $Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 Arg[2] : listref of Bio::EnsEMBL::StrainSlice $strainSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 Example : push @strainSlices, $strainSlice1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 push @strainSlices, $strainSlice2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 .....
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 push @strainSlices, $strainSliceN;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 $alignStrainSlice = Bio::EnsEMBL::AlignStrainSlice->new(-SLICE => $slice,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 -STRAIN => \@strainSlices);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 Description : Creates a new Bio::EnsEMBL::AlignStrainSlice object that will contain a mapper between
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 the Slice object, plus all the indels from the different Strains
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 ReturnType : Bio::EnsEMBL::AlignStrainSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 sub new{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 my $caller = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 my $class = ref($caller) || $caller;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 my ($slice, $strainSlices) = rearrange([qw(SLICE STRAINS)],@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 #check that both StrainSlice and Slice are identical (must have been defined in the same slice)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 foreach my $strainSlice (@{$strainSlices}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 if (($strainSlice->start != $slice->start) || ($strainSlice->end != $slice->end) || ($strainSlice->seq_region_name ne $slice->seq_region_name)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 warning("Not possible to create Align object from different Slices");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 return [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 return bless{'slice' => $slice,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 'strains' => $strainSlices}, $class;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 =head2 alignFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 Arg[1] : Bio::EnsEMBL::Feature $feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 Arg[2] : Bio::EnsEMBL::StrainSlice $strainSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 Example : $new_feature = $alignSlice->alignFeature($feature, $strainSlice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 Description : Creates a new Bio::EnsEMBL::Feature object that aligned to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 the AlignStrainSlice object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 ReturnType : Bio::EnsEMBL::Feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 sub alignFeature{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 my $feature = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 #check that the object is a Feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 if (!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 throw("Bio::EnsEMBL::Feature object expected");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 #and align it to the AlignStrainSlice object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 my $mapper_strain = $self->mapper();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 my @results;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 if ($feature->start > $feature->end){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 #this is an Indel, map it with the special method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 @results = $mapper_strain->map_indel('Slice',$feature->start, $feature->end, $feature->strand,'Slice');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 #and modify the coordinates according to the length of the indel
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 $results[0]->end($results[0]->start + $feature->length_diff -1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 @results = $mapper_strain->map_coordinates('Slice',$feature->start, $feature->end, $feature->strand,'Slice');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 #get need start and end of the new feature, aligned ot AlignStrainSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 my @results_ordered = sort {$a->start <=> $b->start} @results;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 my %new_feature = %$feature; #make a shallow copy of the Feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 $new_feature{'start'}= $results_ordered[0]->start();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 $new_feature{'end'} = $results_ordered[-1]->end(); #get last element of the array, the end of the slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 return bless \%new_feature, ref($feature);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 #getter for the mapper between the Slice and the different StrainSlice objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 sub mapper{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 if (!defined $self->{'mapper'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 #get the alleleFeatures in all the strains
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 if (!defined $self->{'indels'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 #when the list of indels is not defined, get them
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 $self->{'indels'} = $self->_get_indels();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 my $indels = $self->{'indels'}; #gaps in reference slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 my $mapper = Bio::EnsEMBL::Mapper->new('Slice', 'AlignStrainSlice');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 my $start_slice = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 my $end_slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 my $start_align = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 my $end_align;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 my $length_indel = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 my $length_acum_indel = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 foreach my $indel (@{$indels}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 $end_slice = $indel->[0] - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 $end_align = $indel->[0] - 1 + $length_acum_indel; #we must consider length previous indels
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 $length_indel = $indel->[1] - $indel->[0] + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'AlignStrainSlice',$start_align,$end_align);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 $mapper->add_indel_coordinates('Slice',$end_slice + 1,$end_slice,1,'AlignStrainSlice',$end_align + 1,$end_align + $length_indel);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 $start_slice = $end_slice + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 $start_align = $indel->[1] + 1 + $length_acum_indel; #we must consider legnth previous indels
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 $length_acum_indel += $length_indel;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 if ($start_slice <= $self->length){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'AlignStrainSlice',$start_align,$start_align + $self->length - $start_slice)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 $self->{'mapper'} = $mapper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 return $self->{'mapper'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 #returns the length of the AlignSlice: length of the Slice plus the gaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 sub length{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 my $length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 if (!defined $self->{'indels'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 #when the list of indels is not defined, get them
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 $self->{'indels'} = $self->_get_indels();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 $length = $self->{'slice'}->length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 map {$length += ($_->[1] - $_->[0] + 1)} @{$self->{'indels'}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 return $length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 =head2 strains
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 Args : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 Description: Returns list with all strains used to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 define this AlignStrainSlice object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 Returntype : listref of Bio::EnsEMBL::StrainSlice objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 sub strains{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 return $self->{'strains'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 =head2 Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 Args : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 Description: Returns slice where the AlignStrainSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 is defined
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 Returntype : Bio::EnsEMBL::Slice object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 sub Slice{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 return $self->{'slice'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 #method to retrieve, in order, a list with all the indels in the different strains
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 sub _get_indels{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 #go throuh all the strains getting ONLY the indels (length_diff <> 0)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 my @indels;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 foreach my $strainSlice (@{$self->strains}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 my $differences = $strainSlice->get_all_AlleleFeatures_Slice(); #need to check there are differences....
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 foreach my $af (@{$differences}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 #if length is 0, but is a -, it is still a gap in the strain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 if (($af->length_diff != 0) || ($af->length_diff == 0 && $af->allele_string =~ /-/)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 push @indels, $af;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 #need to overlap the gaps using the RangeRegistry module
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 my $range_registry = Bio::EnsEMBL::Mapper::RangeRegistry->new();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 foreach my $indel (@indels){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 #in the reference and the strain there is a gap
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 $range_registry->check_and_register(1,$indel->start,$indel->start) if ($indel->length_diff == 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 #deletion in reference slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 $range_registry->check_and_register(1,$indel->start, $indel->end ) if ($indel->length_diff < 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 #insertion in reference slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 $range_registry->check_and_register(1,$indel->start,$indel->start + $indel->length_diff - 1) if ($indel->length_diff > 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 #and return all the gap coordinates....
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 return $range_registry->get_ranges(1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 =head2 get_all_Slices
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 Description: This Slice is made of several Bio::EnsEMBL::StrainSlices
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 sequence. This method returns these StrainSlices (or part of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 them) with the original coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 Returntype : listref of Bio::EnsEMBL::StrainSlice objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 Exceptions : end should be at least as big as start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 sub get_all_Slices {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 my @strains;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 #add the reference strain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 my $dbVar = $self->Slice->adaptor->db->get_db_adaptor('variation');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 unless($dbVar) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 warning("Variation database must be attached to core database to " .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 "retrieve variation information" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 return '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 my $indAdaptor = $dbVar->get_IndividualAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 my $ref_name = $indAdaptor->get_reference_strain_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 my $ref_strain = Bio::EnsEMBL::StrainSlice->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 -START => $self->Slice->{'start'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 -END => $self->Slice->{'end'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 -STRAND => $self->Slice->{'strand'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 -ADAPTOR => $self->Slice->adaptor(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 -SEQ => $self->Slice->{'seq'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 -SEQ_REGION_NAME => $self->Slice->{'seq_region_name'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 -SEQ_REGION_LENGTH => $self->Slice->{'seq_region_length'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 -COORD_SYSTEM => $self->Slice->{'coord_system'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 -STRAIN_NAME => $ref_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 #this is a fake reference alisce, should not contain any alleleFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 undef $ref_strain->{'alleleFeatures'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 push @strains, @{$self->strains};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 my $new_feature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 my $indel;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 my $aligned_features;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 my $indels = (); #reference to a hash containing indels in the different strains
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 #we need to realign all Features in the different Slices and add '-' in the reference Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 foreach my $strain (@{$self->strains}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 foreach my $af (@{$strain->get_all_AlleleFeatures_Slice()}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 $new_feature = $self->alignFeature($af); #align feature in AlignSlice coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 push @{$aligned_features},$new_feature if($new_feature->seq_region_start <= $strain->end); #some features might map outside slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 if ($af->start != $af->end){ #an indel, need to add to the reference, and realign in the strain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 #make a shallow copy of the indel - clear it first!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 $indel = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 %{$indel} = %{$new_feature};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 bless $indel, ref($new_feature);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 $indel->allele_string('-');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 push @{$indels},$indel; #and include in the list of potential indels
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 next if (!defined $aligned_features);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 undef $strain->{'alleleFeatures'}; #remove all features before adding new aligned features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 push @{$strain->{'alleleFeatures'}}, @{$aligned_features};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 undef $aligned_features;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 push @strains, $ref_strain;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 #need to add indels in the different strains, if not present
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 if (defined $indels){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 foreach my $strain (@strains){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 #inlcude the indels in the StrainSlice object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 push @{$strain->{'alignIndels'}},@{$indels};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 return \@strains;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 1;