annotate variant_effect_predictor/Bio/EnsEMBL/StrainSlice.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::StrainSlice - SubClass of the Slice. Represents the slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 of the genome for a certain strain (applying the variations)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 $sa = $db->get_SliceAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 $slice =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 $strainSlice = $slice->get_by_strain($strain_name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 # get the sequence from the Strain Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 my $seq = $strainSlice->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 print $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 # get allele features between this StrainSlice and the reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 my $afs = $strainSlice->get_all_AlleleFeatures_Slice();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 foreach my $af ( @{$afs} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 print "AlleleFeature in position ", $af->start, "-", $af->end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 " in strain with allele ", $af->allele_string, "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 # compare a strain against another strain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 my $strainSlice_2 = $slice->get_by_strain($strain_name_2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 my $differences =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 $strainSlice->get_all_differences_StrainSlice($strainSlice_2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 foreach my $difference ( @{$differences} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 print "Difference in position ", $difference->start, "-",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 $difference->end(), " in strain with allele ",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 $difference->allele_string(), "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 A StrainSlice object represents a region of a genome for a certain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 strain. It can be used to retrieve sequence or features from a strain.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 package Bio::EnsEMBL::StrainSlice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 use Bio::EnsEMBL::Slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 use Bio::EnsEMBL::Mapper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 @ISA = qw(Bio::EnsEMBL::Slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 Arg[1] : Bio::EnsEMBL::Slice $slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 Arg[2] : string $strain_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 Example : $strainSlice = Bio::EnsEMBL::StrainSlice->new(-.... => ,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 -strain_name => $strain_name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 Description : Creates a new Bio::EnsEMBL::StrainSlice object that will contain a shallow copy of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 Slice object, plus additional information such as the Strain this Slice refers to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 and listref of Bio::EnsEMBL::Variation::AlleleFeatures of differences with the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 ReturnType : Bio::EnsEMBL::StrainSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 sub new{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 my $caller = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 my $class = ref($caller) || $caller;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 my ($strain_name) = rearrange(['STRAIN_NAME'],@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 my $self = $class->SUPER::new(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 $self->{'strain_name'} = $strain_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 if(!$self->adaptor()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 warning('Cannot get new StrainSlice features without attached adaptor');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 return '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 unless($variation_db) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 warning("Variation database must be attached to core database to " .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 "retrieve variation information" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 return '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 if( $af_adaptor ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 #get the Individual for the given strain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 my $ind_adaptor = $variation_db->get_IndividualAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 if ($ind_adaptor){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 my $individual = shift @{$ind_adaptor->fetch_all_by_name($self->{'strain_name'})}; #the name should be unique for a strain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 #check that the individua returned isin the database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 if (defined $individual){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 my $allele_features = $af_adaptor->fetch_all_by_Slice($self,$individual);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 #warning("No strain genotype data available for Slice ".$self->name." and Strain ".$individual->name) if ! defined $allele_features->[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 my $vf_ids = {}; #hash containing the relation vf_id->af
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 $self->{'_strain'} = $individual;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 map {defined $_->{'_variation_feature_id'} ? $vf_ids->{$_->{'_variation_feature_id'}} = $_ : ''
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 } @{$allele_features};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 # my $new_allele_features = $self->_filter_af_by_coverage($allele_features);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 # $self->{'alleleFeatures'} = $new_allele_features;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 $self->{'alleleFeatures'} = $allele_features || [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 $self->{'_vf_ids'} = $vf_ids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 warning("Strain ($self->{strain_name}) not in the database");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 warning("Not possible to retrieve IndividualAdaptor from the variation database");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 return '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 warning("Not possible to retrieve VariationFeatureAdaptor from variation database");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 return '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 =head2 _filter_af_by_coverage
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 Arg [1] : listref to Bio::EnsEMBL::Variation::AlleleFeatures $allele_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 Example : my $new_list_allele_features = $strainSlice->_filter_af_by_coverage($allele_features);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 Description : For a list of allele features, gets a new list where they are filter depending on coverage
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 Caller : internal function
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 sub _filter_af_by_coverage{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 my $allele_features = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 unless($variation_db) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 warning("Variation database must be attached to core database to " .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 "retrieve variation information" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 return '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 my $rc_adaptor = $variation_db->get_ReadCoverageAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 #this is ugly, but ReadCoverage is always defined in the positive strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 ### EK : - it looks like the arguments to fetch_all_by_Slice_Sample_depth have changed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 ### passing 1 will only get you the coverage of level 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 ### by omitting the parameter we take into account all coverage regions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 # my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'},1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 my $new_af;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 foreach my $af (@{$allele_features}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 foreach my $rc (@{$rcs}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 if ($af->start <= $rc->end and $af->start >= $rc->start){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 push @{$new_af}, $af;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 return $new_af;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 =head2 strain_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 Arg [1] : (optional) string $strain_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 Example : my $strain_name = $strainSlice->strain_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 Description : Getter/Setter for the name of the strain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 ReturnType : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 sub strain_name{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 if (@_){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 $self->{'strain_name'} = shift @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 return $self->{'strain_name'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 =head2 display_Slice_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 Example : my $strain_name = $strainSlice->display_Slice_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 Description : Getter for the name of the strain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 ReturnType : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 Caller : webteam
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 sub display_Slice_name{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 return $self->strain_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 =head2 seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 Arg [1] : int $with_coverage (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 Example : print "SEQUENCE = ", $strainSlice->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 Description: Returns the sequence of the region represented by this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 slice formatted as a string in the strain. If flag with_coverage
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 is set to 1, returns sequence if there is coverage in the region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 Returntype : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 sub seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 my $with_coverage = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 $with_coverage ||= 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 # special case for in-between (insert) coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 return '' if($self->start() == $self->end() + 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 return $self->{'seq'} if($self->{'seq'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 if($self->adaptor()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 my $reference_sequence = $seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1); #get the reference sequence for that slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 #apply all differences to the reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 #first, in case there are any indels, create the new sequence (containing the '-' bases)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 # sort edits in reverse order to remove complication of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 # adjusting downstream edits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 my @indels_ordered = sort {$b->start() <=> $a->start()} @{$self->{'alignIndels'}} if (defined $self->{'alignIndels'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 foreach my $vf (@indels_ordered){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 $vf->apply_edit($reference_sequence); #change, in the reference sequence, the vf
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 #need to find coverage information if diffe
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 # sort edits in reverse order to remove complication of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 # adjusting downstream edits
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 my @variation_features_ordered = sort {$b->start() <=> $a->start()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 foreach my $vf (@variation_features_ordered){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 $vf->apply_edit($reference_sequence); #change, in the reference sequence, the vf
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 #need to find coverage information if different from reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 my $indAdaptor = $self->adaptor->db->get_db_adaptor('variation')->get_IndividualAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 my $ref_strain = $indAdaptor->get_reference_strain_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 $self->_add_coverage_information($reference_sequence) if ($with_coverage == 1 && $self->strain_name ne $ref_strain);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 return substr(${$reference_sequence},0,1) if ($self->length == 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 return substr(${$reference_sequence},0,$self->expanded_length); #returns the reference sequence, applying the variationFeatures. Remove additional bases added due to indels
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 # no attached sequence, and no db, so just return Ns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 return 'N' x $self->length();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 sub expanded_length() {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 my $length = $self->SUPER::length();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 foreach my $af(@{$self->{'alleleFeatures'}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 $length += $af->length_diff() if $af->length_diff > 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 return $length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 sub _add_coverage_information{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 my $reference_sequence = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 unless($variation_db) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 warning("Variation database must be attached to core database to " .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 "retrieve variation information" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 return '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 my $rc_adaptor = $variation_db->get_ReadCoverageAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 ### EK : - it looks like the arguments to fetch_all_by_Slice_Sample_depth have changed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 ### passing 1 will only get you the coverage of level 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 ### by omitting the parameter we take into account all coverage regions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 # my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'},1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 my $rcs = $rc_adaptor->fetch_all_by_Slice_Sample_depth($self,$self->{'_strain'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 my $rcs_sorted;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 @{$rcs_sorted} = sort {$a->start <=> $b->start} @{$rcs} if ($self->strand == -1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 $rcs = $rcs_sorted if ($self->strand == -1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 my $start = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 # wm2 - new code to mask sequence, instead starts with masked string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 # and unmasks seq where there is read coverage
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 # get all length-changing vars
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 my @indels_ordered = sort {$a->start() <=> $b->start()} @{$self->{'alignIndels'}} if (defined $self->{'alignIndels'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 my $masked_seq = '~' x length($$reference_sequence);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 foreach my $rc(@{$rcs}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 my ($start, $end) = ($rc->start, $rc->end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 # adjust region for indels
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 foreach my $indel(@indels_ordered) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 next if $rc->start > $end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 # if within RC region, only need adjust the end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 $start += $indel->length_diff unless $indel->start > $start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 $end += $indel->length_diff;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 # adjust coords for seq boundaries
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 $start = 1 if $start < 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 $end = CORE::length($masked_seq) if $end > CORE::length($masked_seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 # now unmask the sequence using $$reference_sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 substr($masked_seq, $start - 1, $end - $start + 1) = substr($$reference_sequence, $start - 1, $end - $start + 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 # wm2 - old code, starts with sequence and masks regions between read coverage - BUGGY
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 # foreach my $rc (@{$rcs}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 # $rc->start(1) if ($rc->start < 0); #if the region lies outside the boundaries of the slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 # $rc->end($self->end - $self->start + 1) if ($rc->end + $self->start > $self->end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 # warn "Adjusted: ", $rc->start, "-", $rc->end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 # warn "Covering from ", $start, " over ", ($rc->start - $start - 1), " bases";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 # substr($$reference_sequence, $start-1,($rc->start - $start - 1),'~' x ($rc->start - $start - 1)) if ($rc->start - 1 > $start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 # $start = $rc->end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 # substr($$reference_sequence, $start, ($self->length - $start) ,'~' x ($self->length - $start)) if ($self->length -1 > $start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 # copy the masked sequence to the reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 $$reference_sequence = $masked_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 =head2 get_AlleleFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 Arg[1] : Bio::EnsEMBL::Variation::VariationFeature $vf
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 Example : my $af = $strainSlice->get_AlleleFeature($vf);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 Description : Returns the AlleleFeature object associated with the VariationFeature (if any)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 ReturnType : Bio::EnsEMBL::Variation::AlleleFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 sub get_AlleleFeature{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 my $vf = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 my $af;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 #look at the hash containing the relation vf_id->alleleFeature, if present, return object, otherwise, undef
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 $af = $self->{'_vf_ids'}->{$vf->dbID} if (defined $self->{'_vf_ids'}->{$vf->dbID});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 return $af;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 =head2 get_all_AlleleFeatures_Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 Arg[1] : int $with_coverage (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 Example : my $af = $strainSlice->get_all_AlleleFeatures_Slice()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 Description : Gets all AlleleFeatures between the StrainSlice object and the Slice is defined.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 If argument $with_coverage set to 1, returns only AF if they have coverage information
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 sub get_all_AlleleFeatures_Slice{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 my $with_coverage = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 my $variation_db = $self->adaptor->db->get_db_adaptor('variation');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 unless($variation_db) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 warning("Variation database must be attached to core database to " .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 "retrieve variation information" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 return '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 my $indAdaptor = $variation_db->get_IndividualAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 my $ref_name = $indAdaptor->get_reference_strain_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 return [] if ($self->strain_name eq $ref_name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 $with_coverage ||= 0; #by default, get all AlleleFeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 if ($with_coverage == 1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 my $new_allele_features = $self->_filter_af_by_coverage($self->{'alleleFeatures'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 return $new_allele_features || [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 return $self->{'alleleFeatures'} || [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 =head2 get_all_differences_StrainSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 Arg[1] : Bio::EnsEMBL::StrainSlice $ss
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 Example : my $differences = $strainSlice->get_all_differences_StrainSlice($ss)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 Description : Gets differences between 2 StrainSlice objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 Exceptions : thrown on bad argument
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 sub get_all_differences_StrainSlice{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 my $strainSlice = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 if (!ref($strainSlice) || !$strainSlice->isa('Bio::EnsEMBL::StrainSlice')){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 throw('Bio::EnsEMBL::StrainSlice arg expected');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457 if ( @{$self->{'alleleFeatures'}} == 0 && @{$strainSlice->{'alleleFeatures'}} == 0){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 return undef; #there are no differences in any of the Strains
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 my $differences; #differences between strains
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 if (@{$strainSlice->{'alleleFeatures'}} == 0){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 #need to create a copy of VariationFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 foreach my $difference (@{$self->{'alleleFeatures'}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 my %vf = %$difference;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 push @{$differences},bless \%vf,ref($difference);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 elsif (@{$self->{'alleleFeatures'}} == 0){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 #need to create a copy of VariationFeature, but changing the allele by the allele in the reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 foreach my $difference (@{$strainSlice->{'alleleFeatures'}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 push @{$differences}, $strainSlice->_convert_difference($difference);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 #both strains have differences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 #create a hash with the differences in the self strain slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 my %variation_features_self = map {$_->start => $_} @{$self->{'alleleFeatures'}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 foreach my $difference (@{$strainSlice->{'alleleFeatures'}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 #there is no difference in the other strain slice, convert the allele
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 if (!defined $variation_features_self{$difference->start}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 push @{$differences},$strainSlice->_convert_difference($difference);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 #if it is defined and have the same allele, delete from the hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 if ($variation_features_self{$difference->start}->allele_string eq $difference->allele_string){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 delete $variation_features_self{$difference->start};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 #and copy the differences that in the self
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 foreach my $difference (values %variation_features_self){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 my %vf = %$difference;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 push @{$differences},bless \%vf,ref($difference);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 #need to map differences to the self
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 my $mapper = $self->mapper(); #now that we have the differences, map them in the StrainSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 # print Dumper($mapper);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 my @results;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 foreach my $difference (@{$differences}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 @results = $mapper->map_coordinates('Slice',$difference->start,$difference->end,$difference->strand,'Slice');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 #we can have 3 possibilities:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 #the difference is an insertion and when mapping returns the boundaries of the insertion in the StrainSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 if (@results == 2){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 #the first position in the result is the beginning of the insertion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 if($results[0]->start < $results[1]->start){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 $difference->start($results[0]->end+1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 $difference->end($results[1]->start-1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 $difference->start($results[1]->end+1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 $difference->end($results[0]->start-1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 $difference->strand($results[0]->strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 #it can be either a SNP or a deletion, and we have the coordinates in the result, etither a Bio::EnsEMBL::Mapper::Coordinate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 # or a Bio::EnsEMBL::Mapper::IndelCoordinate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 # print "Difference: ",$difference->start, "-", $difference->end,"strand ",$difference->strand,"\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522 $difference->start($results[0]->start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 $difference->end($results[0]->end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 $difference->strand($results[0]->strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 return $differences;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 #for a given VariationFeatures, converts the allele into the reference allele and returns a new list with
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 #the converted VariationFeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 sub _convert_difference{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 my $difference = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 my %new_vf = %$difference; #make a copy of the variationFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 #and change the allele with the one from the reference Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 $new_vf{'allele_string'} = $self->SUPER::subseq($difference->start,$difference->end,$difference->strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 return bless \%new_vf,ref($difference);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 =head2 sub_Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 Arg 1 : int $start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 Arg 2 : int $end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 Arge [3] : int $strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 Example : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 Description: Makes another StrainSlice that covers only part of this slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 with the appropriate differences to the reference Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 If a slice is requested which lies outside of the boundaries
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 of this function will return undef. This means that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 behaviour will be consistant whether or not the slice is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 attached to the database (i.e. if there is attached sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 to the slice). Alternatively the expand() method or the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 SliceAdaptor::fetch_by_region method can be used instead.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 Returntype : Bio::EnsEMBL::StrainSlice or undef if arguments are wrong
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 Exceptions : thrown when trying to get the subSlice in the middle of a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 insertion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 sub sub_Slice {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 my ( $self, $start, $end, $strand ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 my $mapper = $self->mapper();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 #finally map from the Slice to the Strain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 my @results = $mapper->map_coordinates('StrainSlice',$start,$end,$strand,'StrainSlice');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568 my $new_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 my $new_end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 my $new_strand;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 my $new_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 #Get need start and end for the subSlice of the StrainSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 my @results_ordered = sort {$a->start <=> $b->start} @results;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 $new_start = $results_ordered[0]->start();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 $new_strand = $results_ordered[0]->strand() if (ref($results_ordered[0]) eq 'Bio::EnsEMBL::Mapper::Coordinate');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 $new_strand = $results_ordered[-1]->strand() if (ref($results_ordered[-1]) eq 'Bio::EnsEMBL::Mapper::Coordinate');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 $new_end = $results_ordered[-1]->end(); #get last element of the array, the end of the slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 my $subSlice = $self->SUPER::sub_Slice($new_start,$new_end,$new_strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 $subSlice->{'strain_name'} = $self->{'strain_name'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 my $new_variations; #reference to an array that will contain the variationFeatures in the new subSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 #update the VariationFeatures in the sub_Slice of the Strain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 my $vf_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 my $vf_end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 my $offset = $subSlice->start - $self->start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 foreach my $variationFeature (@{$self->{'alleleFeatures'}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 #calculate the new position of the variation_feature in the subSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 $vf_start = $variationFeature->start - $offset;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 $vf_end = $variationFeature->end - $offset;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 if ($vf_start >= 1 and $vf_end <= $subSlice->length){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 #copy the variationFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 my %new_vf;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 %new_vf = %$variationFeature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 #and shift to the new coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 $new_vf{'start'} = $vf_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 $new_vf{'end'} = $vf_end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600 my $test = bless \%new_vf, ref($variationFeature);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 push @{$new_variations}, $test;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 $subSlice->{'alleleFeatures'} = $new_variations;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 return $subSlice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 =head2 ref_subseq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611 Arg [1] : int $startBasePair
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 relative to start of slice, which is 1.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 Arg [2] : int $endBasePair
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 relative to start of slice.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 Arg [3] : (optional) int $strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 The strand of the slice to obtain sequence from. Default
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 value is 1.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 Description: returns string of dna from reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 Returntype : txt
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620 Exceptions : end should be at least as big as start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621 strand must be set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 sub ref_subseq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 my $start = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629 my $end = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 my $strand = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631 # special case for in-between (insert) coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 return '' if($start == $end + 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634 my $subseq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 if($self->adaptor){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637 $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 ( $self, $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639 $end, $strand )};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 ## check for gap at the beginning and pad it with Ns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642 if ($start < 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 $subseq = "N" x (1 - $start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 $start = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 ## check for gap at the end and pad it with Ns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 if ($end > $self->length()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 $subseq .= "N" x ($end - $self->length());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 reverse_comp(\$subseq) if($strand == -1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653 return $subseq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 =head2 subseq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658 Arg [1] : int $startBasePair
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 relative to start of slice, which is 1.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660 Arg [2] : int $endBasePair
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 relative to start of slice.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 Arg [3] : (optional) int $strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663 The strand of the slice to obtain sequence from. Default
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 value is 1.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 Description: returns string of dna sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666 Returntype : txt
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 Exceptions : end should be at least as big as start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668 strand must be set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 sub subseq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674 my ( $self, $start, $end, $strand ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676 if ( $end+1 < $start ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677 throw("End coord + 1 is less than start coord");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680 # handle 'between' case for insertions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681 return '' if( $start == $end + 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683 $strand = 1 unless(defined $strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685 if ( $strand != -1 && $strand != 1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686 throw("Invalid strand [$strand] in call to Slice::subseq.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689 my $subseq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690 my $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 if($self->adaptor){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694 $seq = $self->seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695 reverse_comp(\$seq) if ($strand == -1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696 $subseq = substr($seq,$start-1,$end - $start + 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 ## check for gap at the beginning and pad it with Ns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700 if ($start < 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 $subseq = "N" x (1 - $start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702 $start = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705 ## check for gap at the end and pad it with Ns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706 if ($end > $self->length()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707 $subseq .= "N" x ($end - $self->length());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709 reverse_comp(\$subseq) if($strand == -1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711 return $subseq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716 sub mapper{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 if (@_) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720 delete $self->{'mapper'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722 if(!defined $self->{'mapper'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723 #create the mapper between the Slice and StrainSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724 my $mapper = Bio::EnsEMBL::Mapper->new('Slice','StrainSlice');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725 #align with Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 #get all the VariationFeatures in the strain Slice, from start to end in the Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727 my @variation_features_ordered = sort {$a->start() <=> $b->start()} @{$self->{'alleleFeatures'}} if (defined $self->{'alleleFeatures'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729 my $start_slice = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730 my $end_slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731 my $start_strain = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732 my $end_strain;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733 my $length_allele;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734 foreach my $variation_feature (@variation_features_ordered){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735 #we have a insertion/deletion: marks the beginning of new slice move coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 if ($variation_feature->length_diff != 0){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737 $length_allele = $variation_feature->length + $variation_feature->length_diff();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738 $end_slice = $variation_feature->start() - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740 if ($end_slice >= $start_slice){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741 $end_strain = $end_slice - $start_slice + $start_strain;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742 #add the sequence that maps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743 $mapper->add_map_coordinates('Slice',$start_slice,$end_slice,1,'StrainSlice',$start_strain,$end_strain);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744 #add the indel
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745 $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $variation_feature->length,1,'StrainSlice',$end_strain+1,$end_strain + $length_allele);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746 $start_strain = $end_strain + $length_allele + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749 #add the indel
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750 $mapper->add_indel_coordinates('Slice',$end_slice+1,$end_slice + $variation_feature->length,1,'StrainSlice',$end_strain+1,$end_strain + $length_allele);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751 $start_strain += $length_allele;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753 $start_slice = $end_slice + $variation_feature->length+ 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756 if ($start_slice <= $self->length){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757 $mapper->add_map_coordinates('Slice',$start_slice,$self->length,1,'StrainSlice',$start_strain,$start_strain + $self->length - $start_slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759 $self->{'mapper'} = $mapper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761 return $self->{'mapper'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764 =head2 get_all_differences_Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766 Description : DEPRECATED use get_all_AlleleFeatures instead
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770 sub get_all_differences_Slice{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773 deprecate('Use get_all_differences_Slice instead');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774 return $self->get_all_AlleleFeatures_Slice(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777 =head2 get_all_VariationFeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779 Arg[1] : int $with_coverage (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780 Description :returns all alleleFeatures features on this slice.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781 ReturnType : listref of Bio::EnsEMBL::Variation::AlleleFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783 Caller : contigview, snpview
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787 sub get_all_VariationFeatures {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789 my $with_coverage = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790 $with_coverage ||= 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791 return $self->get_all_AlleleFeatures_Slice($with_coverage);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794 =head2 get_original_seq_region_position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
795
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
796 Arg [1] : int $position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
797 relative to start of slice, which is 1.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
798 Description: Placeholder method - this method has no explicit use beyond
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
799 providiing compatibility with AlignSlice. To map positions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
800 between the StrainSlice and the reference slice, use the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
801 mapper and its methods.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
802 Returntype : ($strainSlice, $seq_region_position), an array where the first
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
803 element is a Bio::EnsEMBL::StrainSlice and the second one is the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
804 requested seq_region_position.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
805 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
806 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
807
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
808 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
809
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
810 sub get_original_seq_region_position {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
811 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
812 my $position = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
813 #coordinates in the AlignSlice and Slice are the same, so far will return the same Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
814 #and coordinate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
815 return ($self,$position);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
816 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
817
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
818
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
819 =head2 remove_indels
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
820
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
821 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
822 Example : $strainSlice->remove_indels();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
823 Description : Removes insertions and deletions from the allele features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
824 of this object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
825 ReturnType : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
826 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
827 Caller : webteam
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
828
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
829 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
830
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
831 sub remove_indels {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
832 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
833
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
834 my @new_afs = grep { $_->variation->var_class ne 'in-del' } @{$self->{'alleleFeatures'}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
835
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
836 $self->{'alleleFeatures'} = \@new_afs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
837 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
838
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
839 1;