comparison variant_effect_predictor/Bio/EnsEMBL/LRGSlice.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <dev@ensembl.org>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 =head1 NAME
22
23 Bio::EnsEMBL::LRGSlice - Arbitary Slice of a genome
24
25 =head1 SYNOPSIS
26
27 $sa = $db->get_SliceAdaptor;
28
29 $slice =
30 $sa->fetch_by_region( 'LRG', 'LRG3');
31
32 # get some attributes of the slice
33 my $seqname = $slice->seq_region_name();
34 my $start = $slice->start();
35 my $end = $slice->end();
36
37 # get the sequence from the slice
38 my $seq = $slice->seq();
39
40 # get some features from the slice
41 foreach $gene ( @{ $slice->get_all_Genes } ) {
42 # do something with a gene
43 }
44
45 foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
46 # do something with dna-dna alignments
47 }
48
49 =head1 DESCRIPTION
50
51 A LRG Slice object represents a region of a genome. It can be used to retrieve
52 sequence or features from an area of interest.
53
54 =head1 METHODS
55
56 =cut
57
58 package Bio::EnsEMBL::LRGSlice;
59 use vars qw(@ISA);
60 use strict;
61
62 use Bio::PrimarySeqI;
63
64 use Bio::EnsEMBL::Slice;
65
66 use vars qw(@ISA);
67
68 @ISA = qw(Bio::EnsEMBL::Slice);
69
70 sub new{
71 my $class = shift;
72
73 my $self = bless {}, $class ;
74
75 my $slice = $self = $class->SUPER::new( @_);
76
77 return $self;
78 }
79
80 sub stable_id {
81 my $self = shift;
82 return $self->seq_region_name;
83 }
84
85
86 sub display_xref {
87 my $self = shift;
88 return $self->seq_region_name;
89 }
90
91 sub feature_Slice {
92 my $self = shift;
93 return $self->{_chrom_slice} if defined($self->{_chrom_slice});
94
95 my $max=-99999999999;
96 my $min=9999999999;
97 my $chrom;
98 my $strand;
99
100 # print STDERR "working out feature slcie\n";
101 foreach my $segment (@{$self->project('chromosome')}) {
102 my $from_start = $segment->from_start();
103 my $from_end = $segment->from_end();
104 my $to_name = $segment->to_Slice->seq_region_name();
105 $chrom = $to_name;
106
107 my $to_start = $segment->to_Slice->start();
108 my $to_end = $segment->to_Slice->end();
109 if($to_start > $max){
110 $max = $to_start;
111 }
112 if($to_start < $min){
113 $min = $to_start;
114 }
115 if($to_end > $max){
116 $max = $to_end;
117 }
118 if($to_end < $min){
119 $min = $to_end;
120 }
121 my $ori = $segment->to_Slice->strand();
122 $strand = $ori;
123 }
124 if(!defined($chrom)){
125 warn "Could not project to chromosome for ".$self->name."??\n";
126 return undef;
127 }
128 my $chrom_slice = $self->adaptor->fetch_by_region("chromosome",$chrom, $min, $max, $strand);
129 $self->{_chrom_slice} = $chrom_slice;
130 return $self->{_chrom_slice};
131 }
132
133 sub DESTROY{
134 }
135
136 sub get_all_differences{
137 my $self = shift;
138
139 my @results;
140
141 # get seq_region_attrib diffs (always same-length substitutions)
142 ################################################################
143
144 my $sth = $self->adaptor->prepare(qq{
145 SELECT sra.value
146 FROM seq_region_attrib sra, attrib_type at
147 WHERE at.code = '_rna_edit'
148 AND at.attrib_type_id = sra.attrib_type_id
149 AND sra.seq_region_id = ?
150 });
151
152 $sth->execute($self->get_seq_region_id);
153
154 my $edit_string;
155
156 $sth->bind_columns(\$edit_string);
157
158 while($sth->fetch()) {
159 my ($start, $end, $edit) = split " ", $edit_string;
160
161 my $slice = $self->sub_Slice($start, $end);
162 my $chr_proj = $slice->project("chromosome");
163 my $ref_seq = '-';
164 if(scalar @$chr_proj == 1) {
165 $ref_seq = $chr_proj->[0]->[2]->seq;
166 }
167
168
169 my $diff = {
170 'start' => $start,
171 'end' => $end,
172 'type' => 'substitution',
173 'seq' => $edit,
174 'ref' => $ref_seq,
175 };
176
177 push @results, $diff;
178 }
179
180 # get more complex differences via projections
181 ##############################################
182
183 # project the LRG slice to contig coordinates
184 my @segs = @{$self->project("contig")};
185
186 # if the LRG projects into more than one segment
187 if(scalar @segs > 1) {
188
189 my ($prev_end, $prev_chr_start, $prev_chr_end, $prev_was_chr);
190
191 foreach my $seg(@segs) {
192
193 # is this a novel LRG contig, or does it project to a chromosome?
194 my @chr_proj = @{$seg->[2]->project("chromosome")};
195
196 # if it is a normal contig
197 if(scalar @chr_proj) {
198
199 # check if there has been a deletion in LRG
200 if($prev_was_chr && $prev_end == $seg->[0] - 1) {
201
202 # check it's not just a break in contigs
203 unless(
204 ($chr_proj[0]->[2]->strand != $self->strand && $prev_chr_start == $chr_proj[0]->[2]->end + 1) ||
205 ($chr_proj[0]->[2]->strand != $self->strand && $prev_chr_end == $chr_proj[0]->[2]->start - 1)
206 ) {
207
208 # now get deleted slice coords, depends on the strand rel to LRG
209 my ($s, $e);
210
211 # opposite strand
212 if($chr_proj[0]->[2]->strand != $self->strand) {
213 ($s, $e) = ($prev_chr_start - 1, $chr_proj[0]->[2]->end + 1);
214 }
215
216 # same strand
217 else {
218 ($s, $e) = ($prev_chr_end + 1, $chr_proj[0]->[2]->start - 1);
219 }
220
221 if($s > $e) {
222 warn "Oops, trying to create a slice from $s to $e (could have been ", $prev_chr_start - 1, "-", $chr_proj[0]->[2]->end + 1, " or ", $prev_chr_end + 1, "-", $chr_proj[0]->[2]->start - 1, ")";
223 }
224
225 else {
226 # get a slice representing the sequence that was deleted
227 my $deleted_slice = $self->adaptor->fetch_by_region("chromosome", $chr_proj[0]->[2]->seq_region_name, $s, $e);
228
229 my $diff = {
230 'start' => $seg->[0],
231 'end' => $prev_end,
232 'type' => 'deletion',
233 'seq' => '-',
234 'ref' => $deleted_slice->seq." ".$deleted_slice->start.'-'.$deleted_slice->end,
235 };
236
237 push @results, $diff;
238 }
239 }
240 }
241
242 $prev_was_chr = 1;
243
244 $prev_chr_start = $chr_proj[0]->[2]->start;
245 $prev_chr_end = $chr_proj[0]->[2]->end;
246 }
247
248 # if it is an LRG made-up contig for an insertion
249 else {
250 $prev_was_chr = 0;
251
252 my $diff = {
253 'start' => $seg->[0],
254 'end' => $seg->[1],
255 'type' => 'insertion',
256 'seq' => substr($self->seq, $seg->[0] - 1, $seg->[1] - $seg->[0] + 1),
257 'ref' => '-',
258 };
259
260 push @results, $diff;
261 }
262
263 $prev_end = $seg->[1];
264 }
265 }
266
267 # return results sorted by start, then end position
268 return [sort {$a->{start} <=> $b->{start} || $a->{end} <=> $b->{end}} @results];
269 }
270
271 1;