annotate variant_effect_predictor/Bio/EnsEMBL/DBSQL/StrainSliceAdaptor.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2009 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::DBSQL::StrainSliceAdaptor - adaptor/factory for MappedSlices
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 representing alternative assemblies
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 my $slice =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 my $msc = Bio::EnsEMBL::MappedSliceContainer->new(-SLICE => $slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 # create a new strain slice adaptor and attach it to the MSC
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 my $ssa = Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new($sa->db);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 $msc->set_StrainSliceAdaptor($ssa);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 # now attach strain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 $msc->attach_StrainSlice('Watson');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 NOTE: this code is under development and not fully functional nor tested
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 yet. Use only for development.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 This adaptor is a factory for creating MappedSlices representing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 strains and attaching them to a MappedSliceContainer. A mapper will be created
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 to map between the reference slice and the common container slice coordinate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 system.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 fetch_by_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 =head1 REALTED MODULES
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 Bio::EnsEMBL::MappedSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 Bio::EnsEMBL::MappedSliceContainer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 Bio::EnsEMBL::AlignStrainSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 Bio::EnsEMBL::StrainSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 package Bio::EnsEMBL::DBSQL::StrainSliceAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 no warnings 'uninitialized';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 use Bio::EnsEMBL::MappedSlice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 use Bio::EnsEMBL::Mapper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 our @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 Example : my $strain_slice_adaptor =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 Bio::EnsEMBL::DBSQL::StrainSliceAdaptor->new;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 Description : Constructor.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 Return type : Bio::EnsEMBL::DBSQL::StrainSliceAdaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 my $caller = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 my $class = ref($caller) || $caller;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 my $self = $class->SUPER::new(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 =head2 fetch_by_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 Arg[1] : Bio::EnsEMBL::MappedSliceContainer $container - the container
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 to attach MappedSlices to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 Arg[2] : String $name - the name of the strain to fetch
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 Example : my ($mapped_slice) = @{ $msc->fetch_by_name('Watson') };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 Description : Creates a MappedSlice representing a version of the container's
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 reference slice with variant alleles from the named strain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 Return type : listref of Bio::EnsEMBL::MappedSlice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 Exceptions : thrown on wrong or missing arguments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 Caller : general, Bio::EnsEMBL::MappedSliceContainer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 sub fetch_by_name {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 my $container = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 my $name = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 # argueent check
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 unless ($container and ref($container) and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 $container->isa('Bio::EnsEMBL::MappedSliceContainer')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 throw("Need a MappedSliceContainer.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 unless ($name) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 throw("Need a strain name.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 my $slice = $container->ref_slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 # get a connection to the variation DB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 my $variation_db = $self->db->get_db_adaptor('variation');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 unless($variation_db) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 warning("Variation database must be attached to core database to retrieve variation information");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 return '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 # now get an allele feature adaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 my $af_adaptor = $variation_db->get_AlleleFeatureAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 # check we got it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 unless(defined $af_adaptor) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 warning("Not possible to retrieve AlleleFeatureAdaptor from variation database");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 return '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 # now get an individual adaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 my $ind_adaptor = $variation_db->get_IndividualAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 # check we got it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 unless(defined $ind_adaptor) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 warning("Not possible to retrieve IndividualAdaptor from variation database");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 return '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 # fetch individual object for this strain name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 my $ind = shift @{$ind_adaptor->fetch_all_by_name($name)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 # check we got a result
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 unless(defined $ind) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 warn("Strain ".$name." not found in the database");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 return '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 ## MAP STRAIN SLICE TO REF SLICE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 ################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 # create a mapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 my $mapper = Bio::EnsEMBL::Mapper->new('mapped_slice', 'ref_slice');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 # create a mapped_slice object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 my $mapped_slice = Bio::EnsEMBL::MappedSlice->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 -ADAPTOR => $self,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 -CONTAINER => $container,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 -NAME => $slice->name."\#strain_$name",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 # get the strain slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 my $strain_slice = $slice->get_by_strain($ind->name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 # get all allele features for this slice and individual
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 #my @afs = sort {$a->start() <=> $b->start()} @{$af_adaptor->fetch_all_by_Slice($slice, $ind)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 # get allele features with coverage info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 my $afs = $strain_slice->get_all_AlleleFeatures_Slice(1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 # check we got some data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 #warning("No strain genotype data available for slice ".$slice->name." and strain ".$ind->name) if ! defined $afs[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 my $start_slice = $slice->start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 my $start_strain = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 my $sr_name = $slice->seq_region_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 #my $sr_name = 'ref_slice';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 my ($end_slice, $end_strain, $allele_length);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 my $indel_flag = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 my $total_length_diff = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 # check for AFs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 if(defined($afs) && scalar @$afs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 # go through each AF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 foreach my $af(@$afs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 # find out if it changes the length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 if($af->length_diff != 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 $indel_flag = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 $total_length_diff += $af->length_diff;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 # get the allele length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 $allele_length = $af->length + $af->length_diff();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 $end_slice = $slice->start + $af->start() - 2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 if ($end_slice >= $start_slice){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 $end_strain = $end_slice - $start_slice + $start_strain;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 #add the sequence that maps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 $mapper->add_map_coordinates('mapped_slice', $start_strain, $end_strain, 1, $sr_name, $start_slice, $end_slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 #add the indel
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 $mapper->add_indel_coordinates('mapped_slice', $end_strain+1, $end_strain+$allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 $start_strain = $end_strain + $allele_length + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 #add the indel
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 $mapper->add_indel_coordinates('mapped_slice', $end_strain+1,$end_strain + $allele_length, 1, $sr_name,$end_slice+1,$end_slice + $af->length);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 $start_strain += $allele_length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 $start_slice = $end_slice + $af->length+ 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 # add the remaining coordinates (or the whole length if no indels found)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 $mapper->add_map_coordinates('mapped_slice', $start_strain, $start_strain + ($slice->end - $start_slice), 1, $sr_name, $start_slice, $slice->end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 # add the slice/mapper pair
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 $mapped_slice->add_Slice_Mapper_pair($strain_slice, $mapper);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 ## MAP REF_SLICE TO CONTAINER SLICE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 ###################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 if($total_length_diff > 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 # create a new mapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 my $new_mapper = Bio::EnsEMBL::Mapper->new('ref_slice', 'container');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 # get existing pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 my @existing_pairs = $container->mapper->list_pairs('container', 1, $container->container_slice->length, 'container');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 my @new_pairs = $mapper->list_pairs('mapped_slice', 1, $strain_slice->length(), 'mapped_slice');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 # we need a list of indels (specifically inserts)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 my @indels;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 # go through existing first
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 foreach my $pair(@existing_pairs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 my $indel;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 $indel->{'length_diff'} = ($pair->to->end - $pair->to->start) - ($pair->from->end - $pair->from->start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 # we're only interested in inserts here, not deletions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 next unless $indel->{'length_diff'} > 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 $indel->{'ref_start'} = $pair->from->start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 $indel->{'ref_end'} = $pair->from->end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 $indel->{'length'} = $pair->to->end - $pair->to->start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 push @indels, $indel;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 # now new ones
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 foreach my $pair(@new_pairs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 if($pair->from->end - $pair->from->start != $pair->to->end - $pair->to->start) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 my $indel;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 $indel->{'length_diff'} = ($pair->from->end - $pair->from->start) - ($pair->to->end - $pair->to->start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 # we're only interested in inserts here, not deletions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 next unless $indel->{'length_diff'} > 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 $indel->{'ref_start'} = $pair->to->start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 $indel->{'ref_end'} = $pair->to->end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 $indel->{'length'} = $pair->from->end - $pair->from->start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 push @indels, $indel;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 # sort them
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 @indels = sort {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 $a->{'ref_start'} <=> $b->{'ref_start'} || # by position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 $b->{'length_diff'} <=> $a->{'length_diff'} # then by length diff so we only keep the longest
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 } @indels;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 # clean them
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 my @new_indels = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 my $p = $indels[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 push @new_indels, $indels[0] if scalar @indels;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 for my $i(1..$#indels) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 my $c = $indels[$i];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 if($c->{'ref_start'} != $p->{'ref_start'} && $c->{'ref_end'} != $p->{'ref_end'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 push @new_indels, $c;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 $p = $c;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 $start_slice = $slice->start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 $start_strain = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 $sr_name = $slice->seq_region_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 #$sr_name = 'ref_slice';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 foreach my $indel(@new_indels) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 $end_slice = $indel->{'ref_end'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 $end_strain = $start_strain + ($end_slice - $start_slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 $allele_length = $indel->{'length'} + $indel->{'length_diff'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 $new_mapper->add_map_coordinates($sr_name, $start_slice, $end_slice, 1, 'container', $start_strain, $end_strain);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 $new_mapper->add_indel_coordinates($sr_name,$end_slice+1,$end_slice + $indel->{'length'}, 1, 'container', $end_strain+1, $end_strain+$allele_length);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 $start_strain = $end_strain + $allele_length + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 $start_slice = $end_slice + $indel->{'length'} + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 $new_mapper->add_map_coordinates($sr_name, $start_slice, $slice->end, 1, 'container', $start_strain, $start_strain + ($slice->end - $start_slice));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 # replace the mapper with the new mapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 $container->mapper($new_mapper);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 # change the container slice's length according to length diff
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 $container->container_slice($container->container_slice->expand(undef, $total_length_diff, 1));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 return [$mapped_slice];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361