0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::LRGSlice - Arbitary Slice of a genome
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 $sa = $db->get_SliceAdaptor;
|
|
28
|
|
29 $slice =
|
|
30 $sa->fetch_by_region( 'LRG', 'LRG3');
|
|
31
|
|
32 # get some attributes of the slice
|
|
33 my $seqname = $slice->seq_region_name();
|
|
34 my $start = $slice->start();
|
|
35 my $end = $slice->end();
|
|
36
|
|
37 # get the sequence from the slice
|
|
38 my $seq = $slice->seq();
|
|
39
|
|
40 # get some features from the slice
|
|
41 foreach $gene ( @{ $slice->get_all_Genes } ) {
|
|
42 # do something with a gene
|
|
43 }
|
|
44
|
|
45 foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
|
|
46 # do something with dna-dna alignments
|
|
47 }
|
|
48
|
|
49 =head1 DESCRIPTION
|
|
50
|
|
51 A LRG Slice object represents a region of a genome. It can be used to retrieve
|
|
52 sequence or features from an area of interest.
|
|
53
|
|
54 =head1 METHODS
|
|
55
|
|
56 =cut
|
|
57
|
|
58 package Bio::EnsEMBL::LRGSlice;
|
|
59 use vars qw(@ISA);
|
|
60 use strict;
|
|
61
|
|
62 use Bio::PrimarySeqI;
|
|
63
|
|
64 use Bio::EnsEMBL::Slice;
|
|
65
|
|
66 use vars qw(@ISA);
|
|
67
|
|
68 @ISA = qw(Bio::EnsEMBL::Slice);
|
|
69
|
|
70 sub new{
|
|
71 my $class = shift;
|
|
72
|
|
73 my $self = bless {}, $class ;
|
|
74
|
|
75 my $slice = $self = $class->SUPER::new( @_);
|
|
76
|
|
77 return $self;
|
|
78 }
|
|
79
|
|
80 sub stable_id {
|
|
81 my $self = shift;
|
|
82 return $self->seq_region_name;
|
|
83 }
|
|
84
|
|
85
|
|
86 sub display_xref {
|
|
87 my $self = shift;
|
|
88 return $self->seq_region_name;
|
|
89 }
|
|
90
|
|
91 sub feature_Slice {
|
|
92 my $self = shift;
|
|
93 return $self->{_chrom_slice} if defined($self->{_chrom_slice});
|
|
94
|
|
95 my $max=-99999999999;
|
|
96 my $min=9999999999;
|
|
97 my $chrom;
|
|
98 my $strand;
|
|
99
|
|
100 # print STDERR "working out feature slcie\n";
|
|
101 foreach my $segment (@{$self->project('chromosome')}) {
|
|
102 my $from_start = $segment->from_start();
|
|
103 my $from_end = $segment->from_end();
|
|
104 my $to_name = $segment->to_Slice->seq_region_name();
|
|
105 $chrom = $to_name;
|
|
106
|
|
107 my $to_start = $segment->to_Slice->start();
|
|
108 my $to_end = $segment->to_Slice->end();
|
|
109 if($to_start > $max){
|
|
110 $max = $to_start;
|
|
111 }
|
|
112 if($to_start < $min){
|
|
113 $min = $to_start;
|
|
114 }
|
|
115 if($to_end > $max){
|
|
116 $max = $to_end;
|
|
117 }
|
|
118 if($to_end < $min){
|
|
119 $min = $to_end;
|
|
120 }
|
|
121 my $ori = $segment->to_Slice->strand();
|
|
122 $strand = $ori;
|
|
123 }
|
|
124 if(!defined($chrom)){
|
|
125 warn "Could not project to chromosome for ".$self->name."??\n";
|
|
126 return undef;
|
|
127 }
|
|
128 my $chrom_slice = $self->adaptor->fetch_by_region("chromosome",$chrom, $min, $max, $strand);
|
|
129 $self->{_chrom_slice} = $chrom_slice;
|
|
130 return $self->{_chrom_slice};
|
|
131 }
|
|
132
|
|
133 sub DESTROY{
|
|
134 }
|
|
135
|
|
136 sub get_all_differences{
|
|
137 my $self = shift;
|
|
138
|
|
139 my @results;
|
|
140
|
|
141 # get seq_region_attrib diffs (always same-length substitutions)
|
|
142 ################################################################
|
|
143
|
|
144 my $sth = $self->adaptor->prepare(qq{
|
|
145 SELECT sra.value
|
|
146 FROM seq_region_attrib sra, attrib_type at
|
|
147 WHERE at.code = '_rna_edit'
|
|
148 AND at.attrib_type_id = sra.attrib_type_id
|
|
149 AND sra.seq_region_id = ?
|
|
150 });
|
|
151
|
|
152 $sth->execute($self->get_seq_region_id);
|
|
153
|
|
154 my $edit_string;
|
|
155
|
|
156 $sth->bind_columns(\$edit_string);
|
|
157
|
|
158 while($sth->fetch()) {
|
|
159 my ($start, $end, $edit) = split " ", $edit_string;
|
|
160
|
|
161 my $slice = $self->sub_Slice($start, $end);
|
|
162 my $chr_proj = $slice->project("chromosome");
|
|
163 my $ref_seq = '-';
|
|
164 if(scalar @$chr_proj == 1) {
|
|
165 $ref_seq = $chr_proj->[0]->[2]->seq;
|
|
166 }
|
|
167
|
|
168
|
|
169 my $diff = {
|
|
170 'start' => $start,
|
|
171 'end' => $end,
|
|
172 'type' => 'substitution',
|
|
173 'seq' => $edit,
|
|
174 'ref' => $ref_seq,
|
|
175 };
|
|
176
|
|
177 push @results, $diff;
|
|
178 }
|
|
179
|
|
180 # get more complex differences via projections
|
|
181 ##############################################
|
|
182
|
|
183 # project the LRG slice to contig coordinates
|
|
184 my @segs = @{$self->project("contig")};
|
|
185
|
|
186 # if the LRG projects into more than one segment
|
|
187 if(scalar @segs > 1) {
|
|
188
|
|
189 my ($prev_end, $prev_chr_start, $prev_chr_end, $prev_was_chr);
|
|
190
|
|
191 foreach my $seg(@segs) {
|
|
192
|
|
193 # is this a novel LRG contig, or does it project to a chromosome?
|
|
194 my @chr_proj = @{$seg->[2]->project("chromosome")};
|
|
195
|
|
196 # if it is a normal contig
|
|
197 if(scalar @chr_proj) {
|
|
198
|
|
199 # check if there has been a deletion in LRG
|
|
200 if($prev_was_chr && $prev_end == $seg->[0] - 1) {
|
|
201
|
|
202 # check it's not just a break in contigs
|
|
203 unless(
|
|
204 ($chr_proj[0]->[2]->strand != $self->strand && $prev_chr_start == $chr_proj[0]->[2]->end + 1) ||
|
|
205 ($chr_proj[0]->[2]->strand != $self->strand && $prev_chr_end == $chr_proj[0]->[2]->start - 1)
|
|
206 ) {
|
|
207
|
|
208 # now get deleted slice coords, depends on the strand rel to LRG
|
|
209 my ($s, $e);
|
|
210
|
|
211 # opposite strand
|
|
212 if($chr_proj[0]->[2]->strand != $self->strand) {
|
|
213 ($s, $e) = ($prev_chr_start - 1, $chr_proj[0]->[2]->end + 1);
|
|
214 }
|
|
215
|
|
216 # same strand
|
|
217 else {
|
|
218 ($s, $e) = ($prev_chr_end + 1, $chr_proj[0]->[2]->start - 1);
|
|
219 }
|
|
220
|
|
221 if($s > $e) {
|
|
222 warn "Oops, trying to create a slice from $s to $e (could have been ", $prev_chr_start - 1, "-", $chr_proj[0]->[2]->end + 1, " or ", $prev_chr_end + 1, "-", $chr_proj[0]->[2]->start - 1, ")";
|
|
223 }
|
|
224
|
|
225 else {
|
|
226 # get a slice representing the sequence that was deleted
|
|
227 my $deleted_slice = $self->adaptor->fetch_by_region("chromosome", $chr_proj[0]->[2]->seq_region_name, $s, $e);
|
|
228
|
|
229 my $diff = {
|
|
230 'start' => $seg->[0],
|
|
231 'end' => $prev_end,
|
|
232 'type' => 'deletion',
|
|
233 'seq' => '-',
|
|
234 'ref' => $deleted_slice->seq." ".$deleted_slice->start.'-'.$deleted_slice->end,
|
|
235 };
|
|
236
|
|
237 push @results, $diff;
|
|
238 }
|
|
239 }
|
|
240 }
|
|
241
|
|
242 $prev_was_chr = 1;
|
|
243
|
|
244 $prev_chr_start = $chr_proj[0]->[2]->start;
|
|
245 $prev_chr_end = $chr_proj[0]->[2]->end;
|
|
246 }
|
|
247
|
|
248 # if it is an LRG made-up contig for an insertion
|
|
249 else {
|
|
250 $prev_was_chr = 0;
|
|
251
|
|
252 my $diff = {
|
|
253 'start' => $seg->[0],
|
|
254 'end' => $seg->[1],
|
|
255 'type' => 'insertion',
|
|
256 'seq' => substr($self->seq, $seg->[0] - 1, $seg->[1] - $seg->[0] + 1),
|
|
257 'ref' => '-',
|
|
258 };
|
|
259
|
|
260 push @results, $diff;
|
|
261 }
|
|
262
|
|
263 $prev_end = $seg->[1];
|
|
264 }
|
|
265 }
|
|
266
|
|
267 # return results sorted by start, then end position
|
|
268 return [sort {$a->{start} <=> $b->{start} || $a->{end} <=> $b->{end}} @results];
|
|
269 }
|
|
270
|
|
271 1;
|