annotate variant_effect_predictor/Bio/EnsEMBL/StrainSlice.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
5
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
8
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
10
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
12
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
15
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
18
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
19 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
20
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
22
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::StrainSlice - SubClass of the Slice. Represents the slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
24 of the genome for a certain strain (applying the variations)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
25
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
26 =head1 SYNOPSIS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
27
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
28 $sa = $db->get_SliceAdaptor;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
29
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
30 $slice =
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
31 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
32
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
33 $strainSlice = $slice->get_by_strain($strain_name);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
34
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
35 # get the sequence from the Strain Slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
36 my $seq = $strainSlice->seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
37 print $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
38
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
39 # get allele features between this StrainSlice and the reference
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
40 my $afs = $strainSlice->get_all_AlleleFeatures_Slice();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
41 foreach my $af ( @{$afs} ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
42 print "AlleleFeature in position ", $af->start, "-", $af->end,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
43 " in strain with allele ", $af->allele_string, "\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
44 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
45
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
46 # compare a strain against another strain
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
47 my $strainSlice_2 = $slice->get_by_strain($strain_name_2);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
48 my $differences =
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
49 $strainSlice->get_all_differences_StrainSlice($strainSlice_2);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
50
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
51 foreach my $difference ( @{$differences} ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
52 print "Difference in position ", $difference->start, "-",
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
53 $difference->end(), " in strain with allele ",
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
54 $difference->allele_string(), "\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
55 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
56
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
57 =head1 DESCRIPTION
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
58
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
59 A StrainSlice object represents a region of a genome for a certain
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
60 strain. It can be used to retrieve sequence or features from a strain.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
61
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
62 =head1 METHODS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
63
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
64 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
65
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
66 package Bio::EnsEMBL::StrainSlice;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
67 use vars qw(@ISA);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
68 use strict;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
69
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
70 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
71 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
72 use Bio::EnsEMBL::Slice;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
73 use Bio::EnsEMBL::Mapper;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
74 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
75
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
76 @ISA = qw(Bio::EnsEMBL::Slice);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
77
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
78
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
79 =head2 new
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
80
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
81 Arg[1] : Bio::EnsEMBL::Slice $slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
82 Arg[2] : string $strain_name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
83 Example : $strainSlice = Bio::EnsEMBL::StrainSlice->new(-.... => ,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
84 -strain_name => $strain_name);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
85 Description : Creates a new Bio::EnsEMBL::StrainSlice object that will contain a shallow copy of the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
86 Slice object, plus additional information such as the Strain this Slice refers to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
87 and listref of Bio::EnsEMBL::Variation::AlleleFeatures of differences with the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
88 reference sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
89 ReturnType : Bio::EnsEMBL::StrainSlice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
90 Exceptions : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
91 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
92
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
93 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
94
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
95 sub new{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
96 my $caller = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
97 my $class = ref($caller) || $caller;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
98
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
99 my ($strain_name) = rearrange(['STRAIN_NAME'],@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
100
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
101 my $self = $class->SUPER::new(@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
102
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
103 $self->{'strain_name'} = $strain_name;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
104
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
105 if(!$self->adaptor()) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
106 warning('Cannot get new StrainSlice features without attached adaptor');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
107 return '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
108 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
109 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
110
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
111 unless($variation_db) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
112 warning("Variation database must be attached to core database to " .
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
113 "retrieve variation information" );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
114 return '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
115 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
116
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
117 my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
118
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
119 if( $af_adaptor ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
120 #get the Individual for the given strain
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
121 my $ind_adaptor = $variation_db->get_IndividualAdaptor;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
122
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
123 if ($ind_adaptor){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
124 my $individual = shift @{$ind_adaptor->fetch_all_by_name($self->{'strain_name'})}; #the name should be unique for a strain
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
125 #check that the individua returned isin the database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
126
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
127 if (defined $individual){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
128 my $allele_features = $af_adaptor->fetch_all_by_Slice($self,$individual);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
129 #warning("No strain genotype data available for Slice ".$self->name." and Strain ".$individual->name) if ! defined $allele_features->[0];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
130 my $vf_ids = {}; #hash containing the relation vf_id->af
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
131 $self->{'_strain'} = $individual;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
132 map {defined $_->{'_variation_feature_id'} ? $vf_ids->{$_->{'_variation_feature_id'}} = $_ : ''
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
133 } @{$allele_features};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
134 # my $new_allele_features = $self->_filter_af_by_coverage($allele_features);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
135 # $self->{'alleleFeatures'} = $new_allele_features;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
136 $self->{'alleleFeatures'} = $allele_features || [];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
137 $self->{'_vf_ids'} = $vf_ids;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
138 return $self;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
139 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
140 else{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
141 warning("Strain ($self->{strain_name}) not in the database");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
142 return $self;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
143 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
144 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
145 else{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
146 warning("Not possible to retrieve IndividualAdaptor from the variation database");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
147 return '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
148 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
149 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
150 warning("Not possible to retrieve VariationFeatureAdaptor from variation database");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
151 return '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
152 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
153 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
154
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
155 =head2 _filter_af_by_coverage
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
156
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
157 Arg [1] : listref to Bio::EnsEMBL::Variation::AlleleFeatures $allele_features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
158 Example : my $new_list_allele_features = $strainSlice->_filter_af_by_coverage($allele_features);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
159 Description : For a list of allele features, gets a new list where they are filter depending on coverage
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
160 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
161 Exceptions : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
162 Caller : internal function
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
163
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
164 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
165
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
166 sub _filter_af_by_coverage{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
167 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
168 my $allele_features = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
169
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
170 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
171
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
172 unless($variation_db) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
173 warning("Variation database must be attached to core database to " .
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
174 "retrieve variation information" );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
175 return '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
176 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
177
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
178 my $rc_adaptor = $variation_db->get_ReadCoverageAdaptor();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
179 #this is ugly, but ReadCoverage is always defined in the positive strand
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
180
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
181 ### EK : - it looks like the arguments to fetch_all_by_Slice_Sample_depth have changed
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
182 ### passing 1 will only get you the coverage of level 1
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
183 ### by omitting the parameter we take into account all coverage regions
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
184 # my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'},1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
185 my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
186 my $new_af;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
187 foreach my $af (@{$allele_features}){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
188 foreach my $rc (@{$rcs}){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
189 if ($af->start <= $rc->end and $af->start >= $rc->start){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
190 push @{$new_af}, $af;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
191 last;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
192 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
193 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
194 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
195
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
196 return $new_af;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
197 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
198
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
199
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
200 =head2 strain_name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
201
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
202 Arg [1] : (optional) string $strain_name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
203 Example : my $strain_name = $strainSlice->strain_name();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
204 Description : Getter/Setter for the name of the strain
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
205 ReturnType : string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
206 Exceptions : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
207 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
208
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
209 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
210
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
211 sub strain_name{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
212 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
213 if (@_){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
214 $self->{'strain_name'} = shift @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
215 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
216 return $self->{'strain_name'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
217 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
218
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
219
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
220 =head2 display_Slice_name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
221
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
222 Args : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
223 Example : my $strain_name = $strainSlice->display_Slice_name();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
224 Description : Getter for the name of the strain
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
225 ReturnType : string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
226 Exceptions : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
227 Caller : webteam
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
228
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
229 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
230
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
231 sub display_Slice_name{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
232 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
233
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
234 return $self->strain_name;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
235 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
236
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
237 =head2 seq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
238
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
239 Arg [1] : int $with_coverage (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
240 Example : print "SEQUENCE = ", $strainSlice->seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
241 Description: Returns the sequence of the region represented by this
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
242 slice formatted as a string in the strain. If flag with_coverage
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
243 is set to 1, returns sequence if there is coverage in the region
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
244 Returntype : string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
245 Exceptions : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
246 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
247
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
248 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
249
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
250 sub seq {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
251 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
252 my $with_coverage = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
253
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
254 $with_coverage ||= 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
255
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
256 # special case for in-between (insert) coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
257 return '' if($self->start() == $self->end() + 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
258
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
259 return $self->{'seq'} if($self->{'seq'});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
260
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
261 if($self->adaptor()) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
262 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
263 my $reference_sequence = $seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1); #get the reference sequence for that slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
264
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
265 #apply all differences to the reference sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
266 #first, in case there are any indels, create the new sequence (containing the '-' bases)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
267 # sort edits in reverse order to remove complication of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
268 # adjusting downstream edits
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
269 my @indels_ordered = sort {$b->start() <=> $a->start()} @{$self->{'alignIndels'}} if (defined $self->{'alignIndels'});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
270
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
271 foreach my $vf (@indels_ordered){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
272 $vf->apply_edit($reference_sequence); #change, in the reference sequence, the vf
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
273 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
274
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
275 #need to find coverage information if diffe
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
276 # sort edits in reverse order to remove complication of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
277 # adjusting downstream edits
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
278 my @variation_features_ordered = sort {$b->start() <=> $a->start()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
279
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
280 foreach my $vf (@variation_features_ordered){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
281 $vf->apply_edit($reference_sequence); #change, in the reference sequence, the vf
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
282 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
283
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
284 #need to find coverage information if different from reference
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
285 my $indAdaptor = $self->adaptor->db->get_db_adaptor('variation')->get_IndividualAdaptor;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
286 my $ref_strain = $indAdaptor->get_reference_strain_name;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
287 $self->_add_coverage_information($reference_sequence) if ($with_coverage == 1 && $self->strain_name ne $ref_strain);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
288 return substr(${$reference_sequence},0,1) if ($self->length == 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
289 return substr(${$reference_sequence},0,$self->expanded_length); #returns the reference sequence, applying the variationFeatures. Remove additional bases added due to indels
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
290 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
291
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
292 # no attached sequence, and no db, so just return Ns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
293 return 'N' x $self->length();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
294 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
295
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
296 sub expanded_length() {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
297 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
298
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
299 my $length = $self->SUPER::length();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
300
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
301 foreach my $af(@{$self->{'alleleFeatures'}}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
302 $length += $af->length_diff() if $af->length_diff > 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
303 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
304
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
305 return $length;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
306 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
307
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
308
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
309
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
310 sub _add_coverage_information{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
311 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
312 my $reference_sequence = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
313
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
314 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
315
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
316 unless($variation_db) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
317 warning("Variation database must be attached to core database to " .
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
318 "retrieve variation information" );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
319 return '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
320 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
321
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
322 my $rc_adaptor = $variation_db->get_ReadCoverageAdaptor();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
323 ### EK : - it looks like the arguments to fetch_all_by_Slice_Sample_depth have changed
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
324 ### passing 1 will only get you the coverage of level 1
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
325 ### by omitting the parameter we take into account all coverage regions
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
326 # my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'},1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
327 my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
328 my $rcs_sorted;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
329 @{$rcs_sorted} = sort {$a->start <=> $b->start} @{$rcs} if ($self->strand == -1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
330 $rcs = $rcs_sorted if ($self->strand == -1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
331 my $start = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
332
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
333
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
334 # wm2 - new code to mask sequence, instead starts with masked string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
335 # and unmasks seq where there is read coverage
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
336
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
337 # get all length-changing vars
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
338 my @indels_ordered = sort {$a->start() <=> $b->start()} @{$self->{'alignIndels'}} if (defined $self->{'alignIndels'});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
339
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
340 my $masked_seq = '~' x length($$reference_sequence);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
341
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
342 foreach my $rc(@{$rcs}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
343 my ($start, $end) = ($rc->start, $rc->end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
344
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
345 # adjust region for indels
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
346 foreach my $indel(@indels_ordered) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
347 next if $rc->start > $end;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
348
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
349 # if within RC region, only need adjust the end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
350 $start += $indel->length_diff unless $indel->start > $start;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
351 $end += $indel->length_diff;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
352 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
353
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
354 # adjust coords for seq boundaries
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
355 $start = 1 if $start < 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
356 $end = CORE::length($masked_seq) if $end > CORE::length($masked_seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
357
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
358 # now unmask the sequence using $$reference_sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
359 substr($masked_seq, $start - 1, $end - $start + 1) = substr($$reference_sequence, $start - 1, $end - $start + 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
360 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
361
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
362 # wm2 - old code, starts with sequence and masks regions between read coverage - BUGGY
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
363 # foreach my $rc (@{$rcs}){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
364 # $rc->start(1) if ($rc->start < 0); #if the region lies outside the boundaries of the slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
365 # $rc->end($self->end - $self->start + 1) if ($rc->end + $self->start > $self->end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
366 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
367 # warn "Adjusted: ", $rc->start, "-", $rc->end;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
368 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
369 # warn "Covering from ", $start, " over ", ($rc->start - $start - 1), " bases";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
370 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
371 # substr($$reference_sequence, $start-1,($rc->start - $start - 1),'~' x ($rc->start - $start - 1)) if ($rc->start - 1 > $start);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
372 # $start = $rc->end;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
373 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
374 # }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
375 # substr($$reference_sequence, $start, ($self->length - $start) ,'~' x ($self->length - $start)) if ($self->length -1 > $start);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
376
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
377 # copy the masked sequence to the reference sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
378 $$reference_sequence = $masked_seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
379 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
380
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
381
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
382 =head2 get_AlleleFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
383
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
384 Arg[1] : Bio::EnsEMBL::Variation::VariationFeature $vf
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
385 Example : my $af = $strainSlice->get_AlleleFeature($vf);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
386 Description : Returns the AlleleFeature object associated with the VariationFeature (if any)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
387 ReturnType : Bio::EnsEMBL::Variation::AlleleFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
388 Exceptions : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
389 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
390
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
391 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
392
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
393 sub get_AlleleFeature{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
394 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
395 my $vf = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
396
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
397 my $af;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
398 #look at the hash containing the relation vf_id->alleleFeature, if present, return object, otherwise, undef
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
399 $af = $self->{'_vf_ids'}->{$vf->dbID} if (defined $self->{'_vf_ids'}->{$vf->dbID});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
400 return $af;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
401 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
402
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
403
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
404 =head2 get_all_AlleleFeatures_Slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
405
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
406 Arg[1] : int $with_coverage (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
407 Example : my $af = $strainSlice->get_all_AlleleFeatures_Slice()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
408 Description : Gets all AlleleFeatures between the StrainSlice object and the Slice is defined.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
409 If argument $with_coverage set to 1, returns only AF if they have coverage information
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
410 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
411 Exceptions : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
412 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
413
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
414 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
415
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
416 sub get_all_AlleleFeatures_Slice{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
417 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
418 my $with_coverage = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
419
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
420 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
421
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
422 unless($variation_db) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
423 warning("Variation database must be attached to core database to " .
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
424 "retrieve variation information" );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
425 return '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
426 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
427 my $indAdaptor = $variation_db->get_IndividualAdaptor();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
428 my $ref_name = $indAdaptor->get_reference_strain_name;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
429 return [] if ($self->strain_name eq $ref_name);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
430 $with_coverage ||= 0; #by default, get all AlleleFeatures
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
431 if ($with_coverage == 1){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
432 my $new_allele_features = $self->_filter_af_by_coverage($self->{'alleleFeatures'});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
433 return $new_allele_features || [];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
434 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
435
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
436 return $self->{'alleleFeatures'} || [];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
437 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
438
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
439 =head2 get_all_differences_StrainSlice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
440
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
441 Arg[1] : Bio::EnsEMBL::StrainSlice $ss
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
442 Example : my $differences = $strainSlice->get_all_differences_StrainSlice($ss)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
443 Description : Gets differences between 2 StrainSlice objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
444 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
445 Exceptions : thrown on bad argument
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
446 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
447
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
448 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
449
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
450 sub get_all_differences_StrainSlice{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
451 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
452 my $strainSlice = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
453
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
454 if (!ref($strainSlice) || !$strainSlice->isa('Bio::EnsEMBL::StrainSlice')){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
455 throw('Bio::EnsEMBL::StrainSlice arg expected');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
456 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
457 if ( @{$self->{'alleleFeatures'}} == 0 && @{$strainSlice->{'alleleFeatures'}} == 0){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
458 return undef; #there are no differences in any of the Strains
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
459
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
460 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
461 my $differences; #differences between strains
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
462 if (@{$strainSlice->{'alleleFeatures'}} == 0){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
463 #need to create a copy of VariationFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
464 foreach my $difference (@{$self->{'alleleFeatures'}}){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
465 my %vf = %$difference;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
466 push @{$differences},bless \%vf,ref($difference);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
467 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
468 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
469 elsif (@{$self->{'alleleFeatures'}} == 0){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
470 #need to create a copy of VariationFeature, but changing the allele by the allele in the reference
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
471 foreach my $difference (@{$strainSlice->{'alleleFeatures'}}){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
472 push @{$differences}, $strainSlice->_convert_difference($difference);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
473 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
474 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
475 else{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
476 #both strains have differences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
477 #create a hash with the differences in the self strain slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
478 my %variation_features_self = map {$_->start => $_} @{$self->{'alleleFeatures'}};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
479 foreach my $difference (@{$strainSlice->{'alleleFeatures'}}){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
480 #there is no difference in the other strain slice, convert the allele
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
481 if (!defined $variation_features_self{$difference->start}){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
482 push @{$differences},$strainSlice->_convert_difference($difference);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
483 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
484 else{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
485 #if it is defined and have the same allele, delete from the hash
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
486 if ($variation_features_self{$difference->start}->allele_string eq $difference->allele_string){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
487 delete $variation_features_self{$difference->start};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
488 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
489 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
490 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
491 #and copy the differences that in the self
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
492 foreach my $difference (values %variation_features_self){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
493 my %vf = %$difference;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
494 push @{$differences},bless \%vf,ref($difference);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
495 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
496
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
497 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
498 #need to map differences to the self
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
499 my $mapper = $self->mapper(); #now that we have the differences, map them in the StrainSlice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
500 # print Dumper($mapper);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
501 my @results;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
502 foreach my $difference (@{$differences}){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
503 @results = $mapper->map_coordinates('Slice',$difference->start,$difference->end,$difference->strand,'Slice');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
504 #we can have 3 possibilities:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
505 #the difference is an insertion and when mapping returns the boundaries of the insertion in the StrainSlice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
506 if (@results == 2){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
507 #the first position in the result is the beginning of the insertion
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
508 if($results[0]->start < $results[1]->start){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
509 $difference->start($results[0]->end+1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
510 $difference->end($results[1]->start-1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
511 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
512 else{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
513 $difference->start($results[1]->end+1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
514 $difference->end($results[0]->start-1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
515 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
516 $difference->strand($results[0]->strand);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
517 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
518 else{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
519 #it can be either a SNP or a deletion, and we have the coordinates in the result, etither a Bio::EnsEMBL::Mapper::Coordinate
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
520 # or a Bio::EnsEMBL::Mapper::IndelCoordinate
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
521 # print "Difference: ",$difference->start, "-", $difference->end,"strand ",$difference->strand,"\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
522 $difference->start($results[0]->start);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
523 $difference->end($results[0]->end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
524 $difference->strand($results[0]->strand);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
525 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
526 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
527
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
528 return $differences;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
529 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
530
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
531 #for a given VariationFeatures, converts the allele into the reference allele and returns a new list with
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
532 #the converted VariationFeatures
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
533 sub _convert_difference{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
534 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
535 my $difference = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
536 my %new_vf = %$difference; #make a copy of the variationFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
537 #and change the allele with the one from the reference Slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
538 $new_vf{'allele_string'} = $self->SUPER::subseq($difference->start,$difference->end,$difference->strand);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
539 return bless \%new_vf,ref($difference);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
540 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
541
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
542 =head2 sub_Slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
543
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
544 Arg 1 : int $start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
545 Arg 2 : int $end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
546 Arge [3] : int $strand
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
547 Example : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
548 Description: Makes another StrainSlice that covers only part of this slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
549 with the appropriate differences to the reference Slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
550 If a slice is requested which lies outside of the boundaries
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
551 of this function will return undef. This means that
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
552 behaviour will be consistant whether or not the slice is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
553 attached to the database (i.e. if there is attached sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
554 to the slice). Alternatively the expand() method or the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
555 SliceAdaptor::fetch_by_region method can be used instead.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
556 Returntype : Bio::EnsEMBL::StrainSlice or undef if arguments are wrong
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
557 Exceptions : thrown when trying to get the subSlice in the middle of a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
558 insertion
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
559 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
560
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
561 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
562
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
563 sub sub_Slice {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
564 my ( $self, $start, $end, $strand ) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
565 my $mapper = $self->mapper();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
566 #finally map from the Slice to the Strain
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
567 my @results = $mapper->map_coordinates('StrainSlice',$start,$end,$strand,'StrainSlice');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
568 my $new_start;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
569 my $new_end;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
570 my $new_strand;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
571 my $new_seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
572
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
573 #Get need start and end for the subSlice of the StrainSlice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
574 my @results_ordered = sort {$a->start <=> $b->start} @results;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
575 $new_start = $results_ordered[0]->start();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
576 $new_strand = $results_ordered[0]->strand() if (ref($results_ordered[0]) eq 'Bio::EnsEMBL::Mapper::Coordinate');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
577 $new_strand = $results_ordered[-1]->strand() if (ref($results_ordered[-1]) eq 'Bio::EnsEMBL::Mapper::Coordinate');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
578 $new_end = $results_ordered[-1]->end(); #get last element of the array, the end of the slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
579
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
580 my $subSlice = $self->SUPER::sub_Slice($new_start,$new_end,$new_strand);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
581 $subSlice->{'strain_name'} = $self->{'strain_name'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
582
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
583 my $new_variations; #reference to an array that will contain the variationFeatures in the new subSlice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
584 #update the VariationFeatures in the sub_Slice of the Strain
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
585 my $vf_start;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
586 my $vf_end;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
587 my $offset = $subSlice->start - $self->start;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
588
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
589 foreach my $variationFeature (@{$self->{'alleleFeatures'}}){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
590 #calculate the new position of the variation_feature in the subSlice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
591 $vf_start = $variationFeature->start - $offset;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
592 $vf_end = $variationFeature->end - $offset;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
593 if ($vf_start >= 1 and $vf_end <= $subSlice->length){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
594 #copy the variationFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
595 my %new_vf;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
596 %new_vf = %$variationFeature;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
597 #and shift to the new coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
598 $new_vf{'start'} = $vf_start;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
599 $new_vf{'end'} = $vf_end;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
600 my $test = bless \%new_vf, ref($variationFeature);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
601 push @{$new_variations}, $test;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
602 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
603 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
604 $subSlice->{'alleleFeatures'} = $new_variations;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
605 return $subSlice;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
606
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
607 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
608
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
609 =head2 ref_subseq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
610
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
611 Arg [1] : int $startBasePair
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
612 relative to start of slice, which is 1.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
613 Arg [2] : int $endBasePair
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
614 relative to start of slice.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
615 Arg [3] : (optional) int $strand
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
616 The strand of the slice to obtain sequence from. Default
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
617 value is 1.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
618 Description: returns string of dna from reference sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
619 Returntype : txt
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
620 Exceptions : end should be at least as big as start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
621 strand must be set
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
622 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
623
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
624 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
625
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
626 sub ref_subseq{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
627 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
628 my $start = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
629 my $end = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
630 my $strand = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
631 # special case for in-between (insert) coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
632 return '' if($start == $end + 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
633
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
634 my $subseq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
635 if($self->adaptor){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
636 my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
637 $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
638 ( $self, $start,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
639 $end, $strand )};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
640 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
641 ## check for gap at the beginning and pad it with Ns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
642 if ($start < 1) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
643 $subseq = "N" x (1 - $start);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
644 $start = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
645 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
646 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
647 ## check for gap at the end and pad it with Ns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
648 if ($end > $self->length()) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
649 $subseq .= "N" x ($end - $self->length());
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
650 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
651 reverse_comp(\$subseq) if($strand == -1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
652 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
653 return $subseq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
654 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
655
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
656 =head2 subseq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
657
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
658 Arg [1] : int $startBasePair
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
659 relative to start of slice, which is 1.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
660 Arg [2] : int $endBasePair
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
661 relative to start of slice.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
662 Arg [3] : (optional) int $strand
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
663 The strand of the slice to obtain sequence from. Default
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
664 value is 1.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
665 Description: returns string of dna sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
666 Returntype : txt
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
667 Exceptions : end should be at least as big as start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
668 strand must be set
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
669 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
670
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
671 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
672
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
673 sub subseq {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
674 my ( $self, $start, $end, $strand ) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
675
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
676 if ( $end+1 < $start ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
677 throw("End coord + 1 is less than start coord");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
678 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
679
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
680 # handle 'between' case for insertions
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
681 return '' if( $start == $end + 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
682
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
683 $strand = 1 unless(defined $strand);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
684
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
685 if ( $strand != -1 && $strand != 1 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
686 throw("Invalid strand [$strand] in call to Slice::subseq.");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
687 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
688
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
689 my $subseq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
690 my $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
691 if($self->adaptor){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
692
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
693
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
694 $seq = $self->seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
695 reverse_comp(\$seq) if ($strand == -1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
696 $subseq = substr($seq,$start-1,$end - $start + 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
697 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
698 else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
699 ## check for gap at the beginning and pad it with Ns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
700 if ($start < 1) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
701 $subseq = "N" x (1 - $start);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
702 $start = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
703 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
704 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
705 ## check for gap at the end and pad it with Ns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
706 if ($end > $self->length()) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
707 $subseq .= "N" x ($end - $self->length());
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
708 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
709 reverse_comp(\$subseq) if($strand == -1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
710 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
711 return $subseq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
712
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
713 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
714
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
715
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
716 sub mapper{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
717 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
718
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
719 if (@_) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
720 delete $self->{'mapper'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
721 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
722 if(!defined $self->{'mapper'}){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
723 #create the mapper between the Slice and StrainSlice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
724 my $mapper = Bio::EnsEMBL::Mapper->new('Slice','StrainSlice');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
725 #align with Slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
726 #get all the VariationFeatures in the strain Slice, from start to end in the Slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
727 my @variation_features_ordered = sort {$a->start() <=> $b->start()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
728
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
729 my $start_slice = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
730 my $end_slice;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
731 my $start_strain = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
732 my $end_strain;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
733 my $length_allele;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
734 foreach my $variation_feature (@variation_features_ordered){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
735 #we have a insertion/deletion: marks the beginning of new slice move coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
736 if ($variation_feature->length_diff != 0){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
737 $length_allele = $variation_feature->length + $variation_feature->length_diff();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
738 $end_slice = $variation_feature->start() - 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
739
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
740 if ($end_slice >= $start_slice){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
741 $end_strain = $end_slice - $start_slice + $start_strain;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
742 #add the sequence that maps
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
743 $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'StrainSlice',$start_strain,$end_strain);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
744 #add the indel
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
745 $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $variation_feature->length,1,'StrainSlice',$end_strain+1,$end_strain + $length_allele);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
746 $start_strain = $end_strain + $length_allele + 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
747 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
748 else{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
749 #add the indel
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
750 $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $variation_feature->length,1,'StrainSlice',$end_strain+1,$end_strain + $length_allele);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
751 $start_strain += $length_allele;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
752 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
753 $start_slice = $end_slice + $variation_feature->length+ 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
754 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
755 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
756 if ($start_slice <= $self->length){
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
757 $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'StrainSlice',$start_strain,$start_strain + $self->length - $start_slice);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
758 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
759 $self->{'mapper'} = $mapper;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
760 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
761 return $self->{'mapper'};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
762 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
763
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
764 =head2 get_all_differences_Slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
765
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
766 Description : DEPRECATED use get_all_AlleleFeatures instead
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
767
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
768 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
769
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
770 sub get_all_differences_Slice{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
771 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
772
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
773 deprecate('Use get_all_differences_Slice instead');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
774 return $self->get_all_AlleleFeatures_Slice(@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
775 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
776
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
777 =head2 get_all_VariationFeatures
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
778
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
779 Arg[1] : int $with_coverage (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
780 Description :returns all alleleFeatures features on this slice.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
781 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
782 Exceptions : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
783 Caller : contigview, snpview
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
784
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
785 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
786
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
787 sub get_all_VariationFeatures {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
788 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
789 my $with_coverage = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
790 $with_coverage ||= 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
791 return $self->get_all_AlleleFeatures_Slice($with_coverage);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
792 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
793
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
794 =head2 get_original_seq_region_position
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
795
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
796 Arg [1] : int $position
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
797 relative to start of slice, which is 1.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
798 Description: Placeholder method - this method has no explicit use beyond
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
799 providiing compatibility with AlignSlice. To map positions
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
800 between the StrainSlice and the reference slice, use the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
801 mapper and its methods.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
802 Returntype : ($strainSlice, $seq_region_position), an array where the first
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
803 element is a Bio::EnsEMBL::StrainSlice and the second one is the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
804 requested seq_region_position.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
805 Exceptions : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
806 Caller : general
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
807
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
808 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
809
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
810 sub get_original_seq_region_position {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
811 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
812 my $position = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
813 #coordinates in the AlignSlice and Slice are the same, so far will return the same Slice
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
814 #and coordinate
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
815 return ($self,$position);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
816 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
817
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
818
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
819 =head2 remove_indels
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
820
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
821 Args : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
822 Example : $strainSlice->remove_indels();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
823 Description : Removes insertions and deletions from the allele features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
824 of this object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
825 ReturnType : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
826 Exceptions : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
827 Caller : webteam
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
828
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
829 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
830
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
831 sub remove_indels {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
832 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
833
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
834 my @new_afs = grep { $_->variation->var_class ne 'in-del' } @{$self->{'alleleFeatures'}};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
835
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
836 $self->{'alleleFeatures'} = \@new_afs;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
837 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
838
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
839 1;