annotate variant_effect_predictor/Bio/EnsEMBL/DBSQL/SequenceAdaptor.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::DBSQL::SequenceAdaptor - produce sequence strings from locations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 my $sa = $registry->get_adaptor( 'Human', 'Core', 'Sequence' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 my $dna =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 ${ $sa->fetch_by_Slice_start_end_strand( $slice, 1, 1000, -1 ) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 An adaptor for the retrieval of DNA sequence from the EnsEMBL database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 package Bio::EnsEMBL::DBSQL::SequenceAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 use vars qw(@ISA @EXPORT);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 use Bio::EnsEMBL::DBSQL::BaseAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 use Bio::EnsEMBL::Utils::Cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 our $SEQ_CHUNK_PWR = 18; # 2^18 = approx. 250KB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 our $SEQ_CACHE_SZ = 5;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 our $SEQ_CACHE_MAX = (2 ** $SEQ_CHUNK_PWR) * $SEQ_CACHE_SZ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 @EXPORT = (@{$DBI::EXPORT_TAGS{'sql_types'}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 Arg [1] : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 Example : my $sa = $db_adaptor->get_SequenceAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 Description: Constructor. Calls superclass constructor and initialises
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 internal cache structure.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 Returntype : Bio::EnsEMBL::DBSQL::SequenceAdaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 Caller : DBAdaptor::get_SequenceAdaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 Status : Stable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 my $caller = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 my $class = ref($caller) || $caller;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 my $self = $class->SUPER::new(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 # use an LRU cache to limit the size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 my %seq_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 tie(%seq_cache, 'Bio::EnsEMBL::Utils::Cache', $SEQ_CACHE_SZ);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 $self->{'seq_cache'} = \%seq_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 # See if this has any seq_region_attrib of type "_rna_edit_cache" if so store these
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 # in a hash.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 my $sth = $self->dbc->prepare('select sra.seq_region_id, sra.value from seq_region_attrib sra, attrib_type at where sra.attrib_type_id = at.attrib_type_id and code like "_rna_edit"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 $sth->execute();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 my ($seq_region_id, $value);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 $sth->bind_columns(\$seq_region_id, \$value);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 my %edits;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 my $count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 while($sth->fetch()){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 $count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 push @{$edits{$seq_region_id}}, $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 $sth->finish;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 if($count){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 $self->{_rna_edits_cache} = \%edits;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 =head2 clear_cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 Example : $sa->clear_cache();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 Description : Removes all entries from the associcated sequence cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 Returntype : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 sub clear_cache {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 %{$self->{seq_cache}} = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 =head2 fetch_by_Slice_start_end_strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 Arg [1] : Bio::EnsEMBL::Slice slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 The slice from which you want the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 Arg [2] : (optional) int startBasePair
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 The start base pair relative to the start of the slice. Negative
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 values or values greater than the length of the slice are fine.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 default = 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 Arg [3] : (optional) int endBasePair
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 The end base pair relative to the start of the slice. Negative
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 values or values greater than the length of the slice are fine,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 but the end must be greater than or equal to the start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 count from 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 default = the length of the slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 Arg [4] : (optional) int strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 1, -1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 default = 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 Example : $dna = $seq_adptr->fetch_by_Slice_start_end_strand($slice, 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 1000, -1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 Description: retrieves from db the sequence for this slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 uses AssemblyMapper to find the assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 Returntype : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 Exceptions : endBasePair should be less or equal to length of slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 Caller : Bio::EnsEMBL::Slice::seq(), Slice::subseq()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 Status : Stable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 sub fetch_by_Slice_start_end_strand {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 my ( $self, $slice, $start, $end, $strand ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 if(!ref($slice) || !($slice->isa("Bio::EnsEMBL::Slice") or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 throw("Slice argument is required.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 $start = 1 if(!defined($start));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 if ( ( !defined($end) || $start > $end || $start < 0 || $end < 0 || $slice->start> $slice->end ) && $slice->is_circular ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 if ( !defined($end) || ($start > $end ) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 return $self->_fetch_by_Slice_start_end_strand_circular( $slice, $start, $end, $strand );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 if ( defined($end) && ($end < 0) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 $end += $slice->seq_region_length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 if ($start < 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 $start += $slice->seq_region_length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 if($slice->start> $slice->end) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 return $self->_fetch_by_Slice_start_end_strand_circular( $slice, $slice->start, $slice->end, $strand );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 if ( ( !defined($end) ) && (not $slice->is_circular) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 $end = $slice->end() - $slice->start() + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 if ( $start > $end ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 throw("Start must be less than or equal to end.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 $strand ||= 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 #get a new slice that spans the exact region to retrieve dna from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 my $right_expand = $end - $slice->length(); #negative is fine
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 my $left_expand = 1 - $start; #negative is fine
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 if($right_expand || $left_expand) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 $slice = $slice->expand($left_expand, $right_expand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 #retrieve normalized 'non-symlinked' slices
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 #this allows us to support haplotypes and PARs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 my $slice_adaptor = $slice->adaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 my @symproj=@{$slice_adaptor->fetch_normalized_slice_projection($slice)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 if(@symproj == 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 throw('Could not retrieve normalized Slices. Database contains ' .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 'incorrect assembly_exception information.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 #call this method again with any slices that were 'symlinked' to by this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 #slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 if(@symproj != 1 || $symproj[0]->[2] != $slice) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 my $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 foreach my $segment (@symproj) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 my $symlink_slice = $segment->[2];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 #get sequence from each symlinked area
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 $seq .= ${$self->fetch_by_Slice_start_end_strand($symlink_slice,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 1,undef,1)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 if($strand == -1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 reverse_comp(\$seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 return \$seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 # we need to project this slice onto the sequence coordinate system
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 # even if the slice is in the same coord system, we want to trim out
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 # flanking gaps (if the slice is past the edges of the seqregion)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 my $csa = $self->db->get_CoordSystemAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 my $seqlevel = $csa->fetch_sequence_level();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 my @projection=@{$slice->project($seqlevel->name(), $seqlevel->version())};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 my $seq = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 my $total = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 my $tmp_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 #fetch sequence from each of the sequence regions projected onto
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 foreach my $segment (@projection) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 my ($start, $end, $seq_slice) = @$segment;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 #check for gaps between segments and pad them with Ns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 my $gap = $start - $total - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 if($gap) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 $seq .= 'N' x $gap;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 my $seq_region_id = $slice_adaptor->get_seq_region_id($seq_slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 $tmp_seq = ${$self->_fetch_seq($seq_region_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 $seq_slice->start, $seq_slice->length())};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 #reverse compliment on negatively oriented slices
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 if($seq_slice->strand == -1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 reverse_comp(\$tmp_seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 $seq .= $tmp_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 $total = $end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 #check for any remaining gaps at the end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 my $gap = $slice->length - $total;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 if($gap) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 $seq .= 'N' x $gap;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 #if the sequence is too short it is because we came in with a seqlevel
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 #slice that was partially off of the seq_region. Pad the end with Ns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 #to make long enough
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 if(length($seq) != $slice->length()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 $seq .= 'N' x ($slice->length() - length($seq));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 if(defined($self->{_rna_edits_cache}) and defined($self->{_rna_edits_cache}->{$slice->get_seq_region_id})){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 $self->_rna_edit($slice,\$seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 #if they asked for the negative slice strand revcomp the whole thing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 reverse_comp(\$seq) if($strand == -1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 return \$seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 sub _fetch_by_Slice_start_end_strand_circular {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 my ( $self, $slice, $start, $end, $strand ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 assert_ref( $slice, 'Bio::EnsEMBL::Slice' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 $strand ||= 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 if ( !defined($start) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 $start ||= 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 if ( !defined($end) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 $end = $slice->end() - $slice->start() + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 if ( $start > $end && $slice->is_circular() ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 my ($seq, $seq1, $seq2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 my $midpoint = $slice->seq_region_length - $slice->start + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 $seq1 = ${ $self->_fetch_by_Slice_start_end_strand_circular( $slice, 1, $midpoint, 1 )};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 $seq2 = ${ $self->_fetch_by_Slice_start_end_strand_circular( $slice, $midpoint + 1, $slice->length(), 1 )};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 $seq = $slice->strand > 0 ? "$seq1$seq2" : "$seq2$seq1";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 reverse_comp( \$seq ) if ( $strand == -1 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 return \$seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 # Get a new slice that spans the exact region to retrieve dna from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 my $right_expand = $end - $slice->length(); #negative is fine
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 my $left_expand = 1 - $start; #negative is fine
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 if ( $right_expand || $left_expand ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 $slice =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 $slice->strand > 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 ? $slice->expand( $left_expand, $right_expand )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 : $slice->expand( $right_expand, $left_expand );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 # Retrieve normalized 'non-symlinked' slices. This allows us to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 # support haplotypes and PARs.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 my $slice_adaptor = $slice->adaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 my @symproj =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 @{ $slice_adaptor->fetch_normalized_slice_projection($slice) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 if ( @symproj == 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 throw( 'Could not retrieve normalized Slices. Database contains '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 . 'incorrect assembly_exception information.' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 # Call this method again with any slices that were 'symlinked' to by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 # this slice.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 if ( @symproj != 1 || $symproj[0]->[2] != $slice ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 my $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 foreach my $segment (@symproj) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 my $symlink_slice = $segment->[2];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 # Get sequence from each symlinked area.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 $seq .= ${
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 $self->fetch_by_Slice_start_end_strand( $symlink_slice, 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 undef, 1 ) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 if ( $strand == -1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 reverse_comp( \$seq );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 return \$seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 # We need to project this slice onto the sequence coordinate system
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 # even if the slice is in the same coord system, we want to trim out
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 # flanking gaps (if the slice is past the edges of the seqregion).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 my $csa = $self->db->get_CoordSystemAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 my $seqlevel = $csa->fetch_sequence_level();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 my @projection =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 @{ $slice->project( $seqlevel->name(), $seqlevel->version() ) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 my $seq = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 my $total = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 my $tmp_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 # Fetch sequence from each of the sequence regions projected onto.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 foreach my $segment (@projection) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 my ( $start, $end, $seq_slice ) = @{$segment};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 # Check for gaps between segments and pad them with Ns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 my $gap = $start - $total - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 if ($gap) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 $seq .= 'N' x $gap;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 my $seq_region_id = $slice_adaptor->get_seq_region_id($seq_slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 $tmp_seq = ${
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 $self->_fetch_seq( $seq_region_id, $seq_slice->start(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 $seq_slice->length() ) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 # Reverse compliment on negatively oriented slices.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 if ( $seq_slice->strand == -1 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 reverse_comp( \$tmp_seq );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 $seq .= $tmp_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 $total = $end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 # Check for any remaining gaps at the end.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 my $gap = $slice->length() - $total;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 if ($gap) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 $seq .= 'N' x $gap;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 # If the sequence is too short it is because we came in with a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 # seqlevel slice that was partially off of the seq_region. Pad the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 # end with Ns to make long enough
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 if ( length($seq) != $slice->length() ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 $seq .= 'N' x ( $slice->length() - length($seq) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 if ( defined( $self->{_rna_edits_cache} )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 && defined(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 $self->{_rna_edits_cache}->{ $slice->get_seq_region_id } ) )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 $self->_rna_edit( $slice, \$seq );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 return \$seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 } ## end sub _fetch_by_Slice_start_end_strand_circular
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 sub _rna_edit {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 my $slice = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 my $seq = shift; #reference to string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 my $s_start = $slice->start; #substr start at 0 , but seq starts at 1 (so no -1 here)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 my $s_end = $s_start+length($$seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 foreach my $edit (@{$self->{_rna_edits_cache}->{$slice->get_seq_region_id}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 my ($start, $end, $txt) = split (/\s+/, $edit);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 # check that RNA edit is not outside the requested region : happens quite often with LRG regions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 next if ($end < $s_start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 next if ($s_end < $start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 substr($$seq,$start-$s_start, ($end-$start)+1, $txt);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 sub _fetch_seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 my $seq_region_id = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449 my $start = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 my $length = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 my $cache = $self->{'seq_cache'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 if($length < $SEQ_CACHE_MAX) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 my $chunk_min = ($start-1) >> $SEQ_CHUNK_PWR;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 my $chunk_max = ($start + $length - 1) >> $SEQ_CHUNK_PWR;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 # piece together sequence from cached component parts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 my $entire_seq = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 for(my $i = $chunk_min; $i <= $chunk_max; $i++) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 if($cache->{"$seq_region_id:$i"}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 $entire_seq .= $cache->{"$seq_region_id:$i"};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 # retrieve uncached portions of the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 my $sth =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 $self->prepare( "SELECT SUBSTRING(d.sequence, ?, ?) "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 . "FROM dna d "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 . "WHERE d.seq_region_id = ?" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 my $tmp_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 my $min = ($i << $SEQ_CHUNK_PWR) + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 $sth->bind_param( 1, $min, SQL_INTEGER );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 $sth->bind_param( 2, 1 << $SEQ_CHUNK_PWR, SQL_INTEGER );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 $sth->bind_param( 3, $seq_region_id, SQL_INTEGER );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 $sth->execute();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 $sth->bind_columns(\$tmp_seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 $sth->fetch();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 $sth->finish();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 # always give back uppercased sequence so it can be properly softmasked
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 $entire_seq .= uc($tmp_seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 $cache->{"$seq_region_id:$i"} = uc($tmp_seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 # return only the requested portion of the entire sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 my $min = ( $chunk_min << $SEQ_CHUNK_PWR ) + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 # my $max = ( $chunk_max + 1 ) << $SEQ_CHUNK_PWR;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 my $seq = substr( $entire_seq, $start - $min, $length );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 return \$seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 # do not do any caching for requests of very large sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 my $sth =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 $self->prepare( "SELECT SUBSTRING(d.sequence, ?, ?) "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 . "FROM dna d "
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 . "WHERE d.seq_region_id = ?" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 my $tmp_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 $sth->bind_param( 1, $start, SQL_INTEGER );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 $sth->bind_param( 2, $length, SQL_INTEGER );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 $sth->bind_param( 3, $seq_region_id, SQL_INTEGER );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 $sth->execute();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 $sth->bind_columns(\$tmp_seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 $sth->fetch();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 $sth->finish();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 # always give back uppercased sequence so it can be properly softmasked
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 $tmp_seq = uc($tmp_seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 return \$tmp_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 =head2 store
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 Arg [1] : int $seq_region_id the id of the sequence region this dna
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 will be associated with.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 Arg [2] : string $sequence the dna sequence to be stored
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 in the database. Note that the sequence passed in will be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 converted to uppercase.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 Example : $seq_adaptor->store(11, 'ACTGGGTACCAAACAAACACAACA');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 Description: stores a dna sequence in the databases dna table and returns the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 database identifier for the new record.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 Exceptions : throw if the database insert fails
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 Caller : sequence loading scripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 Status : Stable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 sub store {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 my ($self, $seq_region_id, $sequence) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 if(!$seq_region_id) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 throw('seq_region_id is required');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 $sequence = uc($sequence);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 my $statement =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 $self->prepare("INSERT INTO dna(seq_region_id, sequence) VALUES(?,?)");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 $statement->bind_param(1,$seq_region_id,SQL_INTEGER);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 $statement->bind_param(2,$sequence,SQL_LONGVARCHAR);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 $statement->execute();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 $statement->finish();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 =head2 fetch_by_assembly_location
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 Description: DEPRECATED use fetch_by_Slice_start_end_strand() instead.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 sub fetch_by_assembly_location {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 my ( $self, $chrStart, $chrEnd,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 $strand, $chrName, $assemblyType ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 deprecate('Use fetch_by_Slice_start_end_strand() instead');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 my $csa = $self->db->get_CoordSystem();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 my $top_cs = @{$csa->fetch_all};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 my $slice_adaptor = $self->db->get_SliceAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 my $slice = $slice_adaptor->fetch_by_region($top_cs->name(), $chrName,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 $chrStart, $chrEnd,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 $strand, $top_cs->version);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 return $self->fetch_by_Slice_start_end_strand($slice,1, $slice->length,1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 =head2 fetch_by_RawContig_start_end_strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 Description: DEPRECATED use fetch_by_Slice_start_end_strand instead
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 sub fetch_by_RawContig_start_end_strand {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 deprecate('Use fetch_by_Slice_start_end_strand instead.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 fetch_by_Slice_start_end_strand(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 1;